diff --git a/packages/ipfs-unixfs-importer/.aegir.js b/packages/ipfs-unixfs-importer/.aegir.js new file mode 100644 index 00000000..69129724 --- /dev/null +++ b/packages/ipfs-unixfs-importer/.aegir.js @@ -0,0 +1,7 @@ +'use strict' + +module.exports = { + karma: { + browserNoActivityTimeout: 500 * 1000 + } +} diff --git a/packages/ipfs-unixfs-importer/.gitignore b/packages/ipfs-unixfs-importer/.gitignore new file mode 100644 index 00000000..41396f3f --- /dev/null +++ b/packages/ipfs-unixfs-importer/.gitignore @@ -0,0 +1,45 @@ +docs +yarn.lock +**/node_modules/ +**/*.log +test/repo-tests* +**/bundle.js + +# Logs +logs +*.log + +coverage + +# Runtime data +pids +*.pid +*.seed + +# Directory for instrumented libs generated by jscoverage/JSCover +lib-cov + +# Coverage directory used by tools like istanbul +coverage +.nyc_output + +# Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files) +.grunt + +# node-waf configuration +.lock-wscript + +build + +# Dependency directory +# https://www.npmjs.org/doc/misc/npm-faq.html#should-i-check-my-node_modules-folder-into-git +node_modules + +lib +dist +test/test-data/go-ipfs-repo/LOCK +test/test-data/go-ipfs-repo/LOG +test/test-data/go-ipfs-repo/LOG.old + +# while testing npm5 +package-lock.json diff --git a/packages/ipfs-unixfs-importer/.npmignore b/packages/ipfs-unixfs-importer/.npmignore new file mode 100644 index 00000000..70ea7a67 --- /dev/null +++ b/packages/ipfs-unixfs-importer/.npmignore @@ -0,0 +1,33 @@ + +.DS_Store +tests/repo-tests* + +# Logs +logs +*.log + +# Runtime data +pids +*.pid +*.seed + +# Directory for instrumented libs generated by jscoverage/JSCover +lib-cov + +# Coverage directory used by tools like istanbul +coverage + +# Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files) +.grunt + +# node-waf configuration +.lock-wscript + +# Compiled binary addons (http://nodejs.org/api/addons.html) +build/Release + +# Dependency directory +# https://www.npmjs.org/doc/misc/npm-faq.html#should-i-check-my-node_modules-folder-into-git +node_modules + +test diff --git a/packages/ipfs-unixfs-importer/.travis.yml b/packages/ipfs-unixfs-importer/.travis.yml new file mode 100644 index 00000000..be3ad283 --- /dev/null +++ b/packages/ipfs-unixfs-importer/.travis.yml @@ -0,0 +1,40 @@ +language: node_js +cache: npm +stages: + - check + - test + - cov + +node_js: + - '12' + +os: + - linux + - osx + - windows + +script: npx nyc -s npm run test:node -- --bail +after_success: npx nyc report --reporter=text-lcov > coverage.lcov && npx codecov + +jobs: + include: + - stage: check + script: + - npx aegir commitlint --travis + - npx aegir dep-check + - npm run lint + + - stage: test + name: chrome + addons: + chrome: stable + script: npx aegir test -t browser -t webworker + + - stage: test + name: firefox + addons: + firefox: latest + script: npx aegir test -t browser -t webworker -- --browsers FirefoxHeadless + +notifications: + email: false diff --git a/packages/ipfs-unixfs-importer/CHANGELOG.md b/packages/ipfs-unixfs-importer/CHANGELOG.md new file mode 100644 index 00000000..bf0082ce --- /dev/null +++ b/packages/ipfs-unixfs-importer/CHANGELOG.md @@ -0,0 +1,928 @@ + +# [0.45.0](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.44.1...v0.45.0) (2020-02-04) + + +### Bug Fixes + +* only output unixfs things ([#49](https://github.com/ipfs/js-ipfs-unixfs-importer/issues/49)) ([8ecdcf2](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/8ecdcf2)) + + +### BREAKING CHANGES + +* If your data is below the chunk size, and you have `rawLeaves` and +`reduceSingleLeafToSelf` set to true, you'll get a CID that resolves +to a bona fide UnixFS file back with metadata and all that good +stuff instead of a `dag-raw` node. + + + + +## [0.44.1](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.44.0...v0.44.1) (2020-02-03) + + +### Performance Improvements + +* small bl ([#52](https://github.com/ipfs/js-ipfs-unixfs-importer/issues/52)) ([3d461ce](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/3d461ce)) + + + + +# [0.44.0](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.43.1...v0.44.0) (2020-01-15) + + +### Features + +* allow overriding of internal functions ([#48](https://github.com/ipfs/js-ipfs-unixfs-importer/issues/48)) ([0bff5f2](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/0bff5f2)) + + + + +## [0.43.1](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.43.0...v0.43.1) (2020-01-09) + + +### Bug Fixes + +* specify default codec ([4b79619](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/4b79619)) + + + + +# [0.43.0](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.42.0...v0.43.0) (2020-01-08) + + + +# [0.42.0](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.41.0...v0.42.0) (2019-11-27) + + +### Performance Improvements + +* avoid unnecessary buffer copy ([#40](https://github.com/ipfs/js-ipfs-unixfs-importer/issues/40)) ([b5e5b5a](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/b5e5b5a15f8460c0effbedfd6aa39a1e594733df)) +* concurrent file import ([#41](https://github.com/ipfs/js-ipfs-unixfs-importer/issues/41)) ([68ac8cc](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/68ac8cc233dbe73fcb8244911e09ed59789cddc9)), closes [#38](https://github.com/ipfs/js-ipfs-unixfs-importer/issues/38) + + + + +# [0.41.0](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.40.0...v0.41.0) (2019-11-22) + + +### Features + +* support storing metadata in unixfs nodes ([#39](https://github.com/ipfs/js-ipfs-unixfs-importer/issues/39)) ([a47c9ed](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/a47c9ed)) + + + +# [0.40.0](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.39.11...v0.40.0) (2019-08-05) + + +### Bug Fixes + +* update to newest IPLD libraries ([#37](https://github.com/ipfs/js-ipfs-unixfs-importer/issues/37)) ([f79355f](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/f79355f)) + + + +## [0.39.11](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.39.10...v0.39.11) (2019-06-06) + + +### Bug Fixes + +* validate rabin args ([#32](https://github.com/ipfs/js-ipfs-unixfs-importer/issues/32)) ([55c5dba](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/55c5dba)) + + + + +## [0.39.10](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.39.9...v0.39.10) (2019-06-04) + + +### Bug Fixes + +* remove unused dep ([efa2ca2](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/efa2ca2)) + + +### Features + +* use a rabin chunker in wasm ([#31](https://github.com/ipfs/js-ipfs-unixfs-importer/issues/31)) ([d4021db](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/d4021db)) + + + + +## [0.39.9](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.39.8...v0.39.9) (2019-05-24) + + +### Features + +* adds js implementation of rabin chunker for windows and browser ([#30](https://github.com/ipfs/js-ipfs-unixfs-importer/issues/30)) ([542b3e4](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/542b3e4)) + + + + +## [0.39.8](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.39.7...v0.39.8) (2019-05-24) + + +### Bug Fixes + +* make trickle dag importer compatible with go ([#29](https://github.com/ipfs/js-ipfs-unixfs-importer/issues/29)) ([01c7323](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/01c7323)) + + + + +## [0.39.7](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.39.6...v0.39.7) (2019-05-23) + + +### Bug Fixes + +* remove leftpad ([#28](https://github.com/ipfs/js-ipfs-unixfs-importer/issues/28)) ([0aeb0f6](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/0aeb0f6)) + + + + +## [0.39.6](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.39.5...v0.39.6) (2019-05-20) + + +### Bug Fixes + +* final trickle dag tests ([#27](https://github.com/ipfs/js-ipfs-unixfs-importer/issues/27)) ([72b8bc7](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/72b8bc7)) + + + + +## [0.39.5](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.39.4...v0.39.5) (2019-05-20) + + + + +## [0.39.4](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.39.3...v0.39.4) (2019-05-20) + + +### Bug Fixes + +* add missing dependency async-iterator-all ([#26](https://github.com/ipfs/js-ipfs-unixfs-importer/issues/26)) ([83d4075](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/83d4075)) + + + + +## [0.39.3](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.39.2...v0.39.3) (2019-05-18) + + + + +## [0.39.2](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.39.1...v0.39.2) (2019-05-17) + + +### Bug Fixes + +* move async-iterator-first out of dev deps ([7b76f4b](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/7b76f4b)) + + + + +## [0.39.1](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.39.0...v0.39.1) (2019-05-17) + + + + +# [0.39.0](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.38.5...v0.39.0) (2019-05-17) + + +### Features + +* switch to async await ([#24](https://github.com/ipfs/js-ipfs-unixfs-importer/issues/24)) ([2a40ecb](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/2a40ecb)) + + + + +## [0.38.5](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.38.4...v0.38.5) (2019-03-18) + + + + +## [0.38.4](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.38.3...v0.38.4) (2019-01-18) + + + + +## [0.38.3](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.38.2...v0.38.3) (2019-01-16) + + +### Bug Fixes + +* increase test timeouts for sharding ([#18](https://github.com/ipfs/js-ipfs-unixfs-importer/issues/18)) ([bc35f6f](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/bc35f6f)) + + + + +## [0.38.2](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.38.1...v0.38.2) (2019-01-14) + + + + +## [0.38.1](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.38.0...v0.38.1) (2019-01-14) + + +### Bug Fixes + +* turn non-function progress callback into a noop ([#16](https://github.com/ipfs/js-ipfs-unixfs-importer/issues/16)) ([6d2c15d](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/6d2c15d)) + + + + +# [0.38.0](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.37.3...v0.38.0) (2019-01-04) + + +### Bug Fixes + +* pull-stream/throughs/through is not pull-through ([df0abfa](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/df0abfa)) + + +### Performance Improvements + +* do not create new buffers ([4ef5dbc](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/4ef5dbc)) +* switch out pull-block for bl ([#12](https://github.com/ipfs/js-ipfs-unixfs-importer/issues/12)) ([4e5b618](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/4e5b618)) +* write files in parallel chunks, use a through instead of a map ([6a86d55](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/6a86d55)) + + + + +## [0.37.3](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.37.2...v0.37.3) (2018-12-19) + + +### Bug Fixes + +* increase sharding timeouts ([69210b6](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/69210b6)) + + + + +## [0.37.2](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.37.1...v0.37.2) (2018-12-04) + + +### Bug Fixes + +* fix regex to match files with square brackets ([986f945](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/986f945)) + + + + +## [0.37.1](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.37.0...v0.37.1) (2018-12-03) + + +### Performance Improvements + +* deep require pull stream modules ([092b5b4](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/092b5b4)) + + + + +# [0.37.0](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.36.0...v0.37.0) (2018-11-26) + + +### Features + +* export hash function from sharding ([7e24107](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/7e24107)) + + + + +# [0.36.0](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.34.0...v0.36.0) (2018-11-23) + + +### Bug Fixes + +* support slashes in filenames ([3171fab](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/3171fab)) + + +### Features + +* split hamt out into separate module, closes [#1](https://github.com/ipfs/js-ipfs-unixfs-importer/issues/1) ([bf216a9](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/bf216a9)) + + + + +# [0.34.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.33.0...v0.34.0) (2018-11-12) + + +### Bug Fixes + +* updates ipld-dag-pb dep to version without .cid properties ([aa61cce](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/aa61cce)) + + + + +# [0.33.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.32.8...v0.33.0) (2018-10-27) + + +### Bug Fixes + +* fixes [#230](https://github.com/ipfs/js-ipfs-unixfs-engine/issues/230) by returning a through stream that emits the error instead of throwing it ([fdd8429](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/fdd8429)) + + + + +## [0.32.8](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.32.7...v0.32.8) (2018-10-25) + + + + +## [0.32.7](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.32.6...v0.32.7) (2018-10-12) + + +### Bug Fixes + +* return correct chunks of streams, fixes [#229](https://github.com/ipfs/js-ipfs-unixfs-engine/issues/229) ([362c685](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/362c685)) +* skip rabin tests on windows ([ea9e3c3](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/ea9e3c3)) + + + + +## [0.32.6](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.32.5...v0.32.6) (2018-10-12) + + +### Bug Fixes + +* do not use cid property of DAGNodes just yet ([7a2a308](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/7a2a308)) + + + + +## [0.32.5](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.32.4...v0.32.5) (2018-10-12) + + +### Bug Fixes + +* do not overwrite cid property of DAGNodes ([c2e38ae](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/c2e38ae)) +* make sure errors from unmarshalling are caught ([8b2335c](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/8b2335c)) + + + + +## [0.32.4](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.32.3...v0.32.4) (2018-08-23) + + +### Bug Fixes + +* build & export interop with go-ipfs for small file raw leaves ([11885fa](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/11885fa)) + + + + +## [0.32.3](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.32.2...v0.32.3) (2018-08-21) + + +### Bug Fixes + +* import with CID version 1 ([6ef929d](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/6ef929d)) +* typo ([c5cb38b](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/c5cb38b)) + + + + +## [0.32.2](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.32.1...v0.32.2) (2018-08-11) + + +### Bug Fixes + +* make rabin an optional dependency ([bef3152](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/bef3152)) +* skip first hash algorithm as it is no longer valid ([0b84b76](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/0b84b76)), closes [js-multihash#57](https://github.com/js-multihash/issues/57) + + + + +## [0.32.1](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.32.0...v0.32.1) (2018-08-08) + + +### Bug Fixes + +* do not emit empty buffers for non-empty files ([ccc4ad2](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/ccc4ad2)) + + + + +# [0.32.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.31.3...v0.32.0) (2018-08-08) + + +### Features + +* **importer:** add rabin fingerprinting chunk algorithm ([83a5feb](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/83a5feb)), closes [ipfs/js-ipfs#1283](https://github.com/ipfs/js-ipfs/issues/1283) + + + + +## [0.31.3](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.31.2...v0.31.3) (2018-07-24) + + +### Bug Fixes + +* return cids from builder ([0d3d3d8](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/0d3d3d8)) + + + + +## [0.31.2](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.31.1...v0.31.2) (2018-07-20) + + + + +## [0.31.1](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.31.0...v0.31.1) (2018-07-19) + + + + +# [0.31.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.30.1...v0.31.0) (2018-07-19) + + + + +## [0.30.1](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.30.0...v0.30.1) (2018-07-19) + + +### Features + +* support --raw-leaves ([7a29d83](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/7a29d83)), closes [ipfs/js-ipfs#1432](https://github.com/ipfs/js-ipfs/issues/1432) + + + + +# [0.30.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.29.0...v0.30.0) (2018-06-12) + + + + +# [0.29.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.28.1...v0.29.0) (2018-04-23) + + + + +## [0.28.1](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.28.0...v0.28.1) (2018-04-12) + + + + +# [0.28.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.27.0...v0.28.0) (2018-04-10) + + + + +# [0.27.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.26.0...v0.27.0) (2018-03-27) + + +### Features + +* exporter - support slicing streams stored in deeply nested DAGs ([#208](https://github.com/ipfs/js-ipfs-unixfs-engine/issues/208)) ([8568cd5](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/8568cd5)) + + + + +# [0.26.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.25.0...v0.26.0) (2018-03-22) + + +### Features + +* Adds begin/end byte slices to exporter ([#207](https://github.com/ipfs/js-ipfs-unixfs-engine/issues/207)) ([8e11d77](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/8e11d77)) + + + + +# [0.25.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.24.4...v0.25.0) (2018-03-20) + + +### Features + +* Add reader to read files or part of files as streams ([833accf](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/833accf)) + + + + +## [0.24.4](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.24.3...v0.24.4) (2018-02-27) + + +### Bug Fixes + +* use "ipld" instead of "ipld-resolver" ([f4de206](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/f4de206)) + + + + +## [0.24.3](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.24.2...v0.24.3) (2018-02-27) + + + + +## [0.24.2](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.24.1...v0.24.2) (2017-12-15) + + + + +## [0.24.1](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.24.0...v0.24.1) (2017-11-12) + + + + +# [0.24.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.23.1...v0.24.0) (2017-11-12) + + +### Features + +* exporter maxDepth ([#197](https://github.com/ipfs/js-ipfs-unixfs-engine/issues/197)) ([211e4e3](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/211e4e3)) + + + + +## [0.23.1](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.23.0...v0.23.1) (2017-11-10) + + +### Features + +* windows interop ([#195](https://github.com/ipfs/js-ipfs-unixfs-engine/issues/195)) ([aa21ff3](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/aa21ff3)) + + + + +# [0.23.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.22.5...v0.23.0) (2017-11-07) + + +### Features + +* Include hash field for exported files ([#191](https://github.com/ipfs/js-ipfs-unixfs-engine/issues/191)) ([8b13957](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/8b13957)) + + + + +## [0.22.5](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.22.4...v0.22.5) (2017-09-08) + + +### Features + +* Use passed cidVersion option when writing to storage ([#185](https://github.com/ipfs/js-ipfs-unixfs-engine/issues/185)) ([0cd2d60](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/0cd2d60)) + + + + +## [0.22.4](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.22.3...v0.22.4) (2017-09-08) + + +### Features + +* allow specify hash algorithm for large files ([#184](https://github.com/ipfs/js-ipfs-unixfs-engine/issues/184)) ([69915da](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/69915da)) + + + + +## [0.22.3](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.22.2...v0.22.3) (2017-09-07) + + + + +## [0.22.2](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.22.1...v0.22.2) (2017-09-07) + + +### Features + +* Add `onlyHash` option ([#183](https://github.com/ipfs/js-ipfs-unixfs-engine/issues/183)) ([7450a65](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/7450a65)) +* adds call to progress bar function ([#179](https://github.com/ipfs/js-ipfs-unixfs-engine/issues/179)) ([ac6f722](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/ac6f722)) + + + + +## [0.22.1](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.22.0...v0.22.1) (2017-09-04) + + + + +# [0.22.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.21.0...v0.22.0) (2017-07-23) + + + + +# [0.21.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.20.0...v0.21.0) (2017-07-04) + + + + +# [0.20.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.19.2...v0.20.0) (2017-06-16) + + +### Features + +* subtree support ([#175](https://github.com/ipfs/js-ipfs-unixfs-engine/issues/175)) ([16b788c](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/16b788c)) + + + + +## [0.19.2](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.19.1...v0.19.2) (2017-05-25) + + +### Bug Fixes + +* **package:** update cids to version 0.5.0 ([59d6d0a](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/59d6d0a)) + + +### Features + +* dag-api direct support ([adaeb37](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/adaeb37)) + + + + +## [0.19.1](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.19.0...v0.19.1) (2017-03-29) + + +### Bug Fixes + +* adding a dir: leaf node gets replaced with dir if necessary ([1d682ec](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/1d682ec)) + + + + +# [0.19.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.18.0...v0.19.0) (2017-03-24) + + +### Bug Fixes + +* breaking the stack when importing ([993f746](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/993f746)) +* passing browser tests ([29b2740](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/29b2740)) +* using correct murmur3 codec name ([295d86e](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/295d86e)) +* using the new IPLD API ([a80f4d8](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/a80f4d8)) + + + + +# [0.18.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.17.0...v0.18.0) (2017-03-22) + + +### Bug Fixes + +* **package:** update ipld-dag-pb to version 0.10.0 ([#154](https://github.com/ipfs/js-ipfs-unixfs-engine/issues/154)) ([304ff25](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/304ff25)) +* **package:** update pull-pause to version 0.0.1 ([#153](https://github.com/ipfs/js-ipfs-unixfs-engine/issues/153)) ([4dd2143](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/4dd2143)) + + +### Features + +* upgrade to the next version of ipfs-block and blockservice ([0ca25b2](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/0ca25b2)) + + + + +# [0.17.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.16.1...v0.17.0) (2017-02-08) + + +### Features + +* update to latest ipld-resolver ([#137](https://github.com/ipfs/js-ipfs-unixfs-engine/issues/137)) ([211dfb6](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/211dfb6)) + + + + +## [0.16.1](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.16.0...v0.16.1) (2017-02-02) + + +### Bug Fixes + +* exporter: recurse correctly into subdirs ([#136](https://github.com/ipfs/js-ipfs-unixfs-engine/issues/136)) ([69c0d04](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/69c0d04)) + + + + +# [0.16.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.15.4...v0.16.0) (2017-02-02) + + +### Bug Fixes + +* **package:** update is-ipfs to version 0.3.0 ([#134](https://github.com/ipfs/js-ipfs-unixfs-engine/issues/134)) ([0063f9d](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/0063f9d)) + + + + +## [0.15.4](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.15.3...v0.15.4) (2017-01-31) + + +### Bug Fixes + +* case for empty file ([#132](https://github.com/ipfs/js-ipfs-unixfs-engine/issues/132)) ([fee55d1](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/fee55d1)) + + + + +## [0.15.3](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.15.2...v0.15.3) (2017-01-30) + + +### Bug Fixes + +* expect empty stream to not generate any nodes ([#131](https://github.com/ipfs/js-ipfs-unixfs-engine/issues/131)) ([7b054b6](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/7b054b6)) + + + + +## [0.15.2](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.15.1...v0.15.2) (2017-01-30) + + +### Bug Fixes + +* stop export visitor from trying to resolve leaf object ([#130](https://github.com/ipfs/js-ipfs-unixfs-engine/issues/130)) ([651f113](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/651f113)) + + + + +## [0.15.1](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.15.0...v0.15.1) (2017-01-29) + + +### Bug Fixes + +* **package:** update cids to version 0.4.0 ([#122](https://github.com/ipfs/js-ipfs-unixfs-engine/issues/122)) ([65a6759](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/65a6759)) + + + + +# [0.15.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.14.2...v0.15.0) (2017-01-11) + + + + +## [0.14.2](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.14.1...v0.14.2) (2016-12-13) + + + + +## [0.14.1](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.14.0...v0.14.1) (2016-12-08) + + + + +# [0.14.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.13.0...v0.14.0) (2016-11-24) + + +### Features + +* upgrade to latest dag-pb API ([#88](https://github.com/ipfs/js-ipfs-unixfs-engine/issues/88)) ([51d1245](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/51d1245)) + + + + +# [0.13.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.12.0...v0.13.0) (2016-11-03) + + + + +# [0.12.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.11.4...v0.12.0) (2016-10-28) + + +### Bug Fixes + +* **exporter:** add some parallel fetching of blocks where possible ([43503d4](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/43503d4)) + + +### Features + +* migrate importer to use IPLD Resolver and the new IPLD format ([89c3602](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/89c3602)) + + + + +## [0.11.4](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.11.3...v0.11.4) (2016-09-11) + + +### Features + +* **exporter:** implement recursive file export ([68e09a7](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/68e09a7)) + + + + +## [0.11.3](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.11.2...v0.11.3) (2016-09-09) + + +### Features + +* **exporter:** return file sizes ([73cf78a](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/73cf78a)) + + + + +## [0.11.2](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.11.1...v0.11.2) (2016-09-09) + + + + +## [0.11.1](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.11.0...v0.11.1) (2016-09-09) + + + + +# [0.11.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.10.2...v0.11.0) (2016-09-08) + + +### Bug Fixes + +* **tests:** ignore ordering ([f8d1b2a](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/f8d1b2a)) + + + + +## [0.10.2](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.10.1...v0.10.2) (2016-08-09) + + + + +## [0.10.1](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.10.0...v0.10.1) (2016-08-09) + + + + +# [0.10.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.9.0...v0.10.0) (2016-06-28) + + + + +# [0.9.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.8.0...v0.9.0) (2016-05-27) + + + + +# [0.8.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.7.0...v0.8.0) (2016-05-21) + + + + +# [0.7.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.6.1...v0.7.0) (2016-05-21) + + + + +## [0.6.1](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.6.0...v0.6.1) (2016-05-05) + + + + +# [0.6.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.5.0...v0.6.0) (2016-05-03) + + + + +# [0.5.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.4.5...v0.5.0) (2016-04-26) + + + + +## [0.4.5](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.4.4...v0.4.5) (2016-04-24) + + + + +## [0.4.4](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.4.3...v0.4.4) (2016-04-24) + + + + +## [0.4.3](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.4.2...v0.4.3) (2016-04-24) + + +### Bug Fixes + +* clean up dependencies ([a3bee40](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/a3bee40)) +* **importer:** cleanup smaller issues ([eab17fe](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/eab17fe)) + + + + +## [0.4.2](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.4.1...v0.4.2) (2016-04-19) + + + + +## [0.4.1](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.4.0...v0.4.1) (2016-04-19) + + + + +# [0.4.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.3.3...v0.4.0) (2016-04-19) + + + + +## [0.3.3](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.3.2...v0.3.3) (2016-03-22) + + + + +## [0.3.2](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.3.1...v0.3.2) (2016-03-22) + + + + +## [0.3.1](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.3.0...v0.3.1) (2016-03-22) + + + + +# [0.3.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.2.0...v0.3.0) (2016-03-21) + + + + +# [0.2.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.1.0...v0.2.0) (2016-02-17) + + + + +# 0.1.0 (2016-02-12) + + + diff --git a/packages/ipfs-unixfs-importer/LICENSE b/packages/ipfs-unixfs-importer/LICENSE new file mode 100644 index 00000000..b7cf9f52 --- /dev/null +++ b/packages/ipfs-unixfs-importer/LICENSE @@ -0,0 +1,22 @@ +The MIT License (MIT) + +Copyright (c) 2016 David Dias + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + diff --git a/packages/ipfs-unixfs-importer/README.md b/packages/ipfs-unixfs-importer/README.md new file mode 100644 index 00000000..da32517c --- /dev/null +++ b/packages/ipfs-unixfs-importer/README.md @@ -0,0 +1,186 @@ +# ipfs-unixfs-importer + +[![](https://img.shields.io/badge/made%20by-Protocol%20Labs-blue.svg?style=flat-square)](http://ipn.io) +[![](https://img.shields.io/badge/project-IPFS-blue.svg?style=flat-square)](http://ipfs.io/) +[![](https://img.shields.io/badge/freenode-%23ipfs-blue.svg?style=flat-square)](http://webchat.freenode.net/?channels=%23ipfs) +[![standard-readme compliant](https://img.shields.io/badge/standard--readme-OK-green.svg?style=flat-square)](https://github.com/RichardLitt/standard-readme) +[![Build Status](https://flat.badgen.net/travis/ipfs/js-ipfs-unixfs-importer)](https://travis-ci.com/ipfs/js-ipfs-unixfs-importer) +[![Codecov](https://codecov.io/gh/ipfs/js-ipfs-unixfs-importer/branch/master/graph/badge.svg)](https://codecov.io/gh/ipfs/js-ipfs-unixfs-importer) +[![Dependency Status](https://david-dm.org/ipfs/js-ipfs-unixfs-importer.svg?style=flat-square)](https://david-dm.org/ipfs/js-ipfs-unixfs-importer) +[![js-standard-style](https://img.shields.io/badge/code%20style-standard-brightgreen.svg?style=flat-square)](https://github.com/feross/standard) +![](https://img.shields.io/badge/npm-%3E%3D3.0.0-orange.svg?style=flat-square) +![](https://img.shields.io/badge/Node.js-%3E%3D8.0.0-orange.svg?style=flat-square) + +> JavaScript implementation of the layout and chunking mechanisms used by IPFS to handle Files + +## Lead Maintainer + +[Alex Potsides](https://github.com/achingbrain) + +## Table of Contents + +- [Install](#install) +- [Usage](#usage) + - [Example](#example) + - [API](#api) + - [const import = importer(source, ipld [, options])](#const-import--importersource-ipld--options) +- [Overriding internals](#overriding-internals) +- [Contribute](#contribute) +- [License](#license) + +## Install + +``` +> npm install ipfs-unixfs-importer +``` + +## Usage + +### Example + +Let's create a little directory to import: + +```sh +> cd /tmp +> mkdir foo +> echo 'hello' > foo/bar +> echo 'world' > foo/quux +``` + +And write the importing logic: + +```js +const importer = require('ipfs-unixfs-importer') + +// Import path /tmp/foo/bar +const source = [{ + path: '/tmp/foo/bar', + content: fs.createReadStream(file) +}, { + path: '/tmp/foo/quxx', + content: fs.createReadStream(file2) +}] + +// You need to create and pass an ipld-resolve instance +// https://github.com/ipld/js-ipld-resolver +for await (const entry of importer(source, ipld, options)) { + console.info(entry) +} +``` + +When run, metadata about DAGNodes in the created tree is printed until the root: + +```js +{ + cid: CID, // see https://github.com/multiformats/js-cid + path: 'tmp/foo/bar', + unixfs: UnixFS // see https://github.com/ipfs/js-ipfs-unixfs +} +{ + cid: CID, // see https://github.com/multiformats/js-cid + path: 'tmp/foo/quxx', + unixfs: UnixFS // see https://github.com/ipfs/js-ipfs-unixfs +} +{ + cid: CID, // see https://github.com/multiformats/js-cid + path: 'tmp/foo', + unixfs: UnixFS // see https://github.com/ipfs/js-ipfs-unixfs +} +{ + cid: CID, // see https://github.com/multiformats/js-cid + path: 'tmp', + unixfs: UnixFS // see https://github.com/ipfs/js-ipfs-unixfs +} +``` + +#### API + +```js +const importer = require('ipfs-unixfs-importer') +``` + +#### const import = importer(source, ipld [, options]) + +The `import` function returns an async iterator takes a source async iterator that yields objects of the form: + +```js +{ + path: 'a name', + content: (Buffer or iterator emitting Buffers), + mtime: (Number representing seconds since (positive) or before (negative) the Unix Epoch), + mode: (Number representing ugo-rwx, setuid, setguid and sticky bit) +} +``` + +`import` will output file info objects as files get stored in IPFS. When stats on a node are emitted they are guaranteed to have been written. + +`ipld` is an instance of the [`IPLD Resolver`](https://github.com/ipld/js-ipld-resolver) or the [`js-ipfs` `dag api`](https://github.com/ipfs/interface-ipfs-core/blob/master/SPEC/DAG.md) + +The input's file paths and directory structure will be preserved in the [`dag-pb`](https://github.com/ipld/js-ipld-dag-pb) created nodes. + +`options` is an JavaScript option that might include the following keys: + +- `wrap` (boolean, defaults to false): if true, a wrapping node will be created +- `shardSplitThreshold` (positive integer, defaults to 1000): the number of directory entries above which we decide to use a sharding directory builder (instead of the default flat one) +- `chunker` (string, defaults to `"fixed"`): the chunking strategy. Supports: + - `fixed` + - `rabin` +- `avgChunkSize` (positive integer, defaults to `262144`): the average chunk size (rabin chunker only) +- `minChunkSize` (positive integer): the minimum chunk size (rabin chunker only) +- `maxChunkSize` (positive integer, defaults to `262144`): the maximum chunk size +- `strategy` (string, defaults to `"balanced"`): the DAG builder strategy name. Supports: + - `flat`: flat list of chunks + - `balanced`: builds a balanced tree + - `trickle`: builds [a trickle tree](https://github.com/ipfs/specs/pull/57#issuecomment-265205384) +- `maxChildrenPerNode` (positive integer, defaults to `174`): the maximum children per node for the `balanced` and `trickle` DAG builder strategies +- `layerRepeat` (positive integer, defaults to 4): (only applicable to the `trickle` DAG builder strategy). The maximum repetition of parent nodes for each layer of the tree. +- `reduceSingleLeafToSelf` (boolean, defaults to `true`): optimization for, when reducing a set of nodes with one node, reduce it to that node. +- `hamtHashFn` (async function(string) Buffer): a function that hashes file names to create HAMT shards +- `hamtBucketBits` (positive integer, defaults to `8`): the number of bits at each bucket of the HAMT +- `progress` (function): a function that will be called with the byte length of chunks as a file is added to ipfs. +- `onlyHash` (boolean, defaults to false): Only chunk and hash - do not write to disk +- `hashAlg` (string): multihash hashing algorithm to use +- `cidVersion` (integer, default 0): the CID version to use when storing the data (storage keys are based on the CID, _including_ it's version) +- `rawLeaves` (boolean, defaults to false): When a file would span multiple DAGNodes, if this is true the leaf nodes will not be wrapped in `UnixFS` protobufs and will instead contain the raw file bytes +- `leafType` (string, defaults to `'file'`) what type of UnixFS node leaves should be - can be `'file'` or `'raw'` (ignored when `rawLeaves` is `true`) +- `blockWriteConcurrency` (positive integer, defaults to 10) How many blocks to hash and write to the block store concurrently. For small numbers of large files this should be high (e.g. 50). +- `fileImportConcurrency` (number, defaults to 50) How many files to import concurrently. For large numbers of small files this should be high (e.g. 50). + +## Overriding internals + +Several aspects of the importer are overridable by specifying functions as part of the options object with these keys: + +- `chunkValidator` (function): Optional function that supports the signature `async function * (source, options)` + - This function takes input from the `content` field of imported entries. It should transform them into `Buffer`s, throwing an error if it cannot. + - It should yield `Buffer` objects constructed from the `source` or throw an `Error` +- `chunker` (function): Optional function that supports the signature `async function * (source, options)` where `source` is an async generator and `options` is an options object + - It should yield `Buffer` objects. +- `bufferImporter` (function): Optional function that supports the signature `async function * (entry, source, ipld, options)` + - This function should read `Buffer`s from `source` and persist them using `ipld.put` or similar + - `entry` is the `{ path, content }` entry, `source` is an async generator that yields Buffers + - It should yield functions that return a Promise that resolves to an object with the properties `{ cid, unixfs, size }` where `cid` is a [CID], `unixfs` is a [UnixFS] entry and `size` is a `Number` that represents the serialized size of the [IPLD] node that holds the buffer data. + - Values will be pulled from this generator in parallel - the amount of parallelisation is controlled by the `blockWriteConcurrency` option (default: 10) +- `dagBuilder` (function): Optional function that supports the signature `async function * (source, ipld, options)` + - This function should read `{ path, content }` entries from `source` and turn them into DAGs + - It should yield a `function` that returns a `Promise` that resolves to `{ cid, path, unixfs, node }` where `cid` is a `CID`, `path` is a string, `unixfs` is a UnixFS entry and `node` is a `DAGNode`. + - Values will be pulled from this generator in parallel - the amount of parallelisation is controlled by the `fileImportConcurrency` option (default: 50) +- `treeBuilder` (function): Optional function that supports the signature `async function * (source, ipld, options)` + - This function should read `{ cid, path, unixfs, node }` entries from `source` and place them in a directory structure + - It should yield an object with the properties `{ cid, path, unixfs, size }` where `cid` is a `CID`, `path` is a string, `unixfs` is a UnixFS entry and `size` is a `Number`. + +[ipld-resolver instance]: https://github.com/ipld/js-ipld-resolver +[UnixFS]: https://github.com/ipfs/specs/tree/master/unixfs +[IPLD]: https://github.com/ipld/js-ipld +[CID]: https://github.com/multiformats/js-cid + +## Contribute + +Feel free to join in. All welcome. Open an [issue](https://github.com/ipfs/js-ipfs-unixfs-importer/issues)! + +This repository falls under the IPFS [Code of Conduct](https://github.com/ipfs/community/blob/master/code-of-conduct.md). + +[![](https://cdn.rawgit.com/jbenet/contribute-ipfs-gif/master/img/contribute.gif)](https://github.com/ipfs/community/blob/master/contributing.md) + +## License + +[MIT](LICENSE) diff --git a/packages/ipfs-unixfs-importer/package.json b/packages/ipfs-unixfs-importer/package.json new file mode 100644 index 00000000..48524778 --- /dev/null +++ b/packages/ipfs-unixfs-importer/package.json @@ -0,0 +1,94 @@ +{ + "name": "ipfs-unixfs-importer", + "version": "0.45.0", + "description": "JavaScript implementation of the UnixFs importer used by IPFS", + "leadMaintainer": "Alex Potsides ", + "main": "src/index.js", + "browser": { + "fs": false + }, + "scripts": { + "test": "aegir test", + "test:node": "aegir test -t node", + "test:browser": "aegir test -t browser", + "test:webworker": "aegir test -t webworker", + "build": "aegir build", + "lint": "aegir lint", + "release": "aegir release", + "release-minor": "aegir release --type minor", + "release-major": "aegir release --type major", + "coverage": "nyc -s npm run test:node && nyc report --reporter=html", + "dep-check": "aegir dep-check" + }, + "repository": { + "type": "git", + "url": "git+https://github.com/ipfs/js-ipfs-unixfs-importer.git" + }, + "keywords": [ + "IPFS" + ], + "license": "MIT", + "bugs": { + "url": "https://github.com/ipfs/js-ipfs-unixfs-importer/issues" + }, + "engines": { + "node": ">=8.0.0", + "npm": ">=3.0.0" + }, + "homepage": "https://github.com/ipfs/js-ipfs-unixfs-importer#readme", + "devDependencies": { + "aegir": "^20.0.0", + "chai": "^4.2.0", + "cids": "~0.7.1", + "deep-extend": "~0.6.0", + "detect-node": "^2.0.4", + "dirty-chai": "^2.0.1", + "ipfs-unixfs-exporter": "^0.40.0", + "ipld": "^0.25.0", + "ipld-in-memory": "^3.0.0", + "it-buffer-stream": "^1.0.0", + "it-last": "^1.0.0", + "nyc": "^15.0.0", + "sinon": "^8.0.4" + }, + "dependencies": { + "bl": "^4.0.0", + "err-code": "^2.0.0", + "hamt-sharding": "^1.0.0", + "ipfs-unixfs": "^0.3.0", + "ipld-dag-pb": "^0.18.0", + "it-all": "^1.0.1", + "it-batch": "^1.0.3", + "it-first": "^1.0.1", + "it-parallel-batch": "^1.0.3", + "merge-options": "^2.0.0", + "multicodec": "^1.0.0", + "multihashing-async": "^0.8.0", + "rabin-wasm": "~0.0.8" + }, + "contributors": [ + "Alan Shaw ", + "Alan Shaw ", + "Alex Potsides ", + "Arpit Agarwal ", + "Bernard Mordan ", + "Dan Ordille ", + "David Dias ", + "Diogo Silva ", + "Francisco Baio Dias ", + "Friedel Ziegelmayer ", + "Greenkeeper ", + "Hugo Dias ", + "Hugo Dias ", + "Marcin Rataj ", + "Pedro Teixeira ", + "Richard Littauer ", + "Richard Schneider ", + "Stephen Whitmore ", + "Volker Mische ", + "greenkeeper[bot] ", + "jbenet ", + "nginnever ", + "ᴠɪᴄᴛᴏʀ ʙᴊᴇʟᴋʜᴏʟᴍ " + ] +} diff --git a/packages/ipfs-unixfs-importer/src/chunker/fixed-size.js b/packages/ipfs-unixfs-importer/src/chunker/fixed-size.js new file mode 100644 index 00000000..6cef6606 --- /dev/null +++ b/packages/ipfs-unixfs-importer/src/chunker/fixed-size.js @@ -0,0 +1,39 @@ +'use strict' + +const BufferList = require('bl/BufferList') + +module.exports = async function * fixedSizeChunker (source, options) { + let bl = new BufferList() + let currentLength = 0 + let emitted = false + const maxChunkSize = options.maxChunkSize + + for await (const buffer of source) { + bl.append(buffer) + + currentLength += buffer.length + + while (currentLength >= maxChunkSize) { + yield bl.slice(0, maxChunkSize) + emitted = true + + // throw away consumed bytes + if (maxChunkSize === bl.length) { + bl = new BufferList() + currentLength = 0 + } else { + const newBl = new BufferList() + newBl.append(bl.shallowSlice(maxChunkSize)) + bl = newBl + + // update our offset + currentLength -= maxChunkSize + } + } + } + + if (!emitted || currentLength) { + // return any remaining bytes or an empty buffer + yield bl.slice(0, currentLength) + } +} diff --git a/packages/ipfs-unixfs-importer/src/chunker/index.js b/packages/ipfs-unixfs-importer/src/chunker/index.js new file mode 100644 index 00000000..ec2c494b --- /dev/null +++ b/packages/ipfs-unixfs-importer/src/chunker/index.js @@ -0,0 +1,18 @@ +'use strict' + +const errCode = require('err-code') + +const chunkers = { + fixed: require('../chunker/fixed-size'), + rabin: require('../chunker/rabin') +} + +module.exports = (type, source, options) => { + const chunker = chunkers[type] + + if (!chunker) { + throw errCode(new Error(`Unknkown chunker named ${type}`), 'ERR_UNKNOWN_CHUNKER') + } + + return chunker(source, options) +} diff --git a/packages/ipfs-unixfs-importer/src/chunker/rabin.js b/packages/ipfs-unixfs-importer/src/chunker/rabin.js new file mode 100644 index 00000000..6f1a0775 --- /dev/null +++ b/packages/ipfs-unixfs-importer/src/chunker/rabin.js @@ -0,0 +1,73 @@ +'use strict' + +const BufferList = require('bl/BufferList') +const { create } = require('rabin-wasm') +const errcode = require('err-code') + +module.exports = async function * rabinChunker (source, options) { + const rabin = jsRabin() + + let min, max, avg + + if (options.minChunkSize && options.maxChunkSize && options.avgChunkSize) { + avg = options.avgChunkSize + min = options.minChunkSize + max = options.maxChunkSize + } else if (!options.avgChunkSize) { + throw errcode(new Error('please specify an average chunk size'), 'ERR_INVALID_AVG_CHUNK_SIZE') + } else { + avg = options.avgChunkSize + min = avg / 3 + max = avg + (avg / 2) + } + + // validate min/max/avg in the same way as go + if (min < 16) { + throw errcode(new Error('rabin min must be greater than 16'), 'ERR_INVALID_MIN_CHUNK_SIZE') + } + + if (max < min) { + max = min + } + + if (avg < min) { + avg = min + } + + const sizepow = Math.floor(Math.log2(avg)) + + for await (const chunk of rabin(source, { + min: min, + max: max, + bits: sizepow, + window: options.window, + polynomial: options.polynomial + })) { + yield chunk + } +} + +const jsRabin = () => { + return async function * (source, options) { + const r = await create(options.bits, options.min, options.max, options.window) + const buffers = new BufferList() + + for await (const chunk of source) { + buffers.append(chunk) + + const sizes = r.fingerprint(chunk) + + for (let i = 0; i < sizes.length; i++) { + var size = sizes[i] + var buf = buffers.slice(0, size) + buffers.consume(size) + + yield buf + } + } + + if (buffers.length) { + yield buffers.slice(0) + } + } +} diff --git a/packages/ipfs-unixfs-importer/src/dag-builder/dir.js b/packages/ipfs-unixfs-importer/src/dag-builder/dir.js new file mode 100644 index 00000000..42cce150 --- /dev/null +++ b/packages/ipfs-unixfs-importer/src/dag-builder/dir.js @@ -0,0 +1,28 @@ +'use strict' + +const UnixFS = require('ipfs-unixfs') +const persist = require('../utils/persist') +const { + DAGNode +} = require('ipld-dag-pb') + +const dirBuilder = async (item, ipld, options) => { + const unixfs = new UnixFS({ + type: 'directory', + mtime: item.mtime, + mode: item.mode + }) + + const node = new DAGNode(unixfs.marshal(), []) + const cid = await persist(node, ipld, options) + const path = item.path + + return { + cid, + path, + unixfs, + size: node.size + } +} + +module.exports = dirBuilder diff --git a/packages/ipfs-unixfs-importer/src/dag-builder/file/balanced.js b/packages/ipfs-unixfs-importer/src/dag-builder/file/balanced.js new file mode 100644 index 00000000..732f7f76 --- /dev/null +++ b/packages/ipfs-unixfs-importer/src/dag-builder/file/balanced.js @@ -0,0 +1,23 @@ +'use strict' + +const batch = require('it-batch') + +async function * balanced (source, reduce, options) { + yield await reduceToParents(source, reduce, options) +} + +async function reduceToParents (source, reduce, options) { + const roots = [] + + for await (const chunked of batch(source, options.maxChildrenPerNode)) { + roots.push(await reduce(chunked)) + } + + if (roots.length > 1) { + return reduceToParents(roots, reduce, options) + } + + return roots[0] +} + +module.exports = balanced diff --git a/packages/ipfs-unixfs-importer/src/dag-builder/file/buffer-importer.js b/packages/ipfs-unixfs-importer/src/dag-builder/file/buffer-importer.js new file mode 100644 index 00000000..88d89bde --- /dev/null +++ b/packages/ipfs-unixfs-importer/src/dag-builder/file/buffer-importer.js @@ -0,0 +1,50 @@ +'use strict' + +const UnixFS = require('ipfs-unixfs') +const persist = require('../../utils/persist') +const { + DAGNode +} = require('ipld-dag-pb') + +async function * bufferImporter (file, source, ipld, options) { + for await (const buffer of source) { + yield async () => { + options.progress(buffer.length) + let node + let unixfs + let size + + const opts = { + ...options + } + + if (options.rawLeaves) { + node = buffer + size = buffer.length + + opts.codec = 'raw' + opts.cidVersion = 1 + } else { + unixfs = new UnixFS({ + type: options.leafType, + data: buffer, + mtime: file.mtime, + mode: file.mode + }) + + node = new DAGNode(unixfs.marshal()) + size = node.size + } + + const cid = await persist(node, ipld, opts) + + return { + cid: cid, + unixfs, + size + } + } + } +} + +module.exports = bufferImporter diff --git a/packages/ipfs-unixfs-importer/src/dag-builder/file/flat.js b/packages/ipfs-unixfs-importer/src/dag-builder/file/flat.js new file mode 100644 index 00000000..1ac77ef6 --- /dev/null +++ b/packages/ipfs-unixfs-importer/src/dag-builder/file/flat.js @@ -0,0 +1,7 @@ +'use strict' + +const all = require('it-all') + +module.exports = async function * (source, reduce) { + yield await reduce(await all(source)) +} diff --git a/packages/ipfs-unixfs-importer/src/dag-builder/file/index.js b/packages/ipfs-unixfs-importer/src/dag-builder/file/index.js new file mode 100644 index 00000000..f44c5e51 --- /dev/null +++ b/packages/ipfs-unixfs-importer/src/dag-builder/file/index.js @@ -0,0 +1,147 @@ +'use strict' + +const errCode = require('err-code') +const UnixFS = require('ipfs-unixfs') +const persist = require('../../utils/persist') +const { + DAGNode, + DAGLink +} = require('ipld-dag-pb') +const all = require('it-all') +const parallelBatch = require('it-parallel-batch') +const mc = require('multicodec') + +const dagBuilders = { + flat: require('./flat'), + balanced: require('./balanced'), + trickle: require('./trickle') +} + +async function * buildFileBatch (file, source, ipld, options) { + let count = -1 + let previous + let bufferImporter + + if (typeof options.bufferImporter === 'function') { + bufferImporter = options.bufferImporter + } else { + bufferImporter = require('./buffer-importer') + } + + for await (const entry of parallelBatch(bufferImporter(file, source, ipld, options), options.blockWriteConcurrency)) { + count++ + + if (count === 0) { + previous = entry + continue + } else if (count === 1) { + yield previous + previous = null + } + + yield entry + } + + if (previous) { + previous.single = true + yield previous + } +} + +const reduce = (file, ipld, options) => { + return async function (leaves) { + if (leaves.length === 1 && leaves[0].single && options.reduceSingleLeafToSelf) { + const leaf = leaves[0] + + if (leaf.cid.codec === 'raw') { + // only one leaf node which is a buffer + const buffer = await ipld.get(leaf.cid) + + leaf.unixfs = new UnixFS({ + type: 'file', + mtime: file.mtime, + mode: file.mode, + data: buffer + }) + + const node = new DAGNode(leaf.unixfs.marshal()) + + leaf.cid = await ipld.put(node, mc.DAG_PB, options) + leaf.size = node.size + } + + return { + cid: leaf.cid, + path: file.path, + unixfs: leaf.unixfs, + size: leaf.size + } + } + + // create a parent node and add all the leaves + const f = new UnixFS({ + type: 'file', + mtime: file.mtime, + mode: file.mode + }) + + const links = leaves + .filter(leaf => { + if (leaf.cid.codec === 'raw' && leaf.size) { + return true + } + + if (!leaf.unixfs.data && leaf.unixfs.fileSize()) { + return true + } + + return Boolean(leaf.unixfs.data.length) + }) + .map((leaf) => { + if (leaf.cid.codec === 'raw') { + // node is a leaf buffer + f.addBlockSize(leaf.size) + + return new DAGLink(leaf.name, leaf.size, leaf.cid) + } + + if (!leaf.unixfs.data) { + // node is an intermediate node + f.addBlockSize(leaf.unixfs.fileSize()) + } else { + // node is a unixfs 'file' leaf node + f.addBlockSize(leaf.unixfs.data.length) + } + + return new DAGLink(leaf.name, leaf.size, leaf.cid) + }) + + const node = new DAGNode(f.marshal(), links) + const cid = await persist(node, ipld, options) + + return { + cid, + path: file.path, + unixfs: f, + size: node.size + } + } +} + +const fileBuilder = async (file, source, ipld, options) => { + const dagBuilder = dagBuilders[options.strategy] + + if (!dagBuilder) { + throw errCode(new Error(`Unknown importer build strategy name: ${options.strategy}`), 'ERR_BAD_STRATEGY') + } + + const roots = await all(dagBuilder(buildFileBatch(file, source, ipld, options), reduce(file, ipld, options), options)) + + if (roots.length > 1) { + throw errCode(new Error('expected a maximum of 1 roots and got ' + roots.length), 'ETOOMANYROOTS') + } + + return roots[0] +} + +module.exports = fileBuilder diff --git a/packages/ipfs-unixfs-importer/src/dag-builder/file/trickle.js b/packages/ipfs-unixfs-importer/src/dag-builder/file/trickle.js new file mode 100644 index 00000000..5149ff0b --- /dev/null +++ b/packages/ipfs-unixfs-importer/src/dag-builder/file/trickle.js @@ -0,0 +1,150 @@ +'use strict' + +const batch = require('it-batch') + +module.exports = function * trickleReduceToRoot (source, reduce, options) { + yield trickleStream(source, reduce, options) +} + +async function trickleStream (source, reduce, options) { + let root + let iteration = 0 + let maxDepth = 1 + let subTree = root = new Root(options.layerRepeat) + + for await (const layer of batch(source, options.maxChildrenPerNode)) { + if (subTree.isFull()) { + if (subTree !== root) { + root.addChild(await subTree.reduce(reduce)) + } + + if (iteration && iteration % options.layerRepeat === 0) { + maxDepth++ + } + + subTree = new SubTree(maxDepth, options.layerRepeat, iteration) + + iteration++ + } + + subTree.append(layer) + } + + if (subTree && subTree !== root) { + root.addChild(await subTree.reduce(reduce)) + } + + return root.reduce(reduce) +} + +class SubTree { + constructor (maxDepth, layerRepeat, iteration) { + this.maxDepth = maxDepth + this.layerRepeat = layerRepeat + this.currentDepth = 1 + this.iteration = iteration + + this.root = this.node = this.parent = { + children: [], + depth: this.currentDepth, + maxDepth, + maxChildren: (this.maxDepth - this.currentDepth) * this.layerRepeat + } + } + + isFull () { + if (!this.root.data) { + return false + } + + if (this.currentDepth < this.maxDepth && this.node.maxChildren) { + // can descend + this._addNextNodeToParent(this.node) + + return false + } + + // try to find new node from node.parent + const distantRelative = this._findParent(this.node, this.currentDepth) + + if (distantRelative) { + this._addNextNodeToParent(distantRelative) + + return false + } + + return true + } + + _addNextNodeToParent (parent) { + this.parent = parent + + // find site for new node + const nextNode = { + children: [], + depth: parent.depth + 1, + parent, + maxDepth: this.maxDepth, + maxChildren: Math.floor(parent.children.length / this.layerRepeat) * this.layerRepeat + } + + parent.children.push(nextNode) + + this.currentDepth = nextNode.depth + this.node = nextNode + } + + append (layer) { + this.node.data = layer + } + + reduce (reduce) { + return this._reduce(this.root, reduce) + } + + async _reduce (node, reduce) { + let children = [] + + if (node.children.length) { + children = await Promise.all( + node.children + .filter(child => child.data) + .map(child => this._reduce(child, reduce)) + ) + } + + return reduce(node.data.concat(children)) + } + + _findParent (node, depth) { + const parent = node.parent + + if (!parent || parent.depth === 0) { + return + } + + if (parent.children.length === parent.maxChildren || !parent.maxChildren) { + // this layer is full, may be able to traverse to a different branch + return this._findParent(parent, depth) + } + + return parent + } +} + +class Root extends SubTree { + constructor (layerRepeat) { + super(0, layerRepeat) + + this.root.depth = 0 + this.currentDepth = 1 + } + + addChild (child) { + this.root.children.push(child) + } + + reduce (reduce) { + return reduce(this.root.data.concat(this.root.children)) + } +} diff --git a/packages/ipfs-unixfs-importer/src/dag-builder/index.js b/packages/ipfs-unixfs-importer/src/dag-builder/index.js new file mode 100644 index 00000000..a55888d4 --- /dev/null +++ b/packages/ipfs-unixfs-importer/src/dag-builder/index.js @@ -0,0 +1,58 @@ +'use strict' + +const dirBuilder = require('./dir') +const fileBuilder = require('./file') + +async function * dagBuilder (source, ipld, options) { + for await (const entry of source) { + if (entry.path) { + if (entry.path.substring(0, 2) === './') { + options.wrapWithDirectory = true + } + + entry.path = entry.path + .split('/') + .filter(path => path && path !== '.') + .join('/') + } + + if (entry.content) { + let source = entry.content + + // wrap in iterator if it is array-like or not an iterator + if ((!source[Symbol.asyncIterator] && !source[Symbol.iterator]) || source.length !== undefined) { + source = { + [Symbol.iterator]: function * () { + yield entry.content + } + } + } + + let chunker + + if (typeof options.chunker === 'function') { + chunker = options.chunker + } else if (options.chunker === 'rabin') { + chunker = require('../chunker/rabin') + } else { + chunker = require('../chunker/fixed-size') + } + + let chunkValidator + + if (typeof options.chunkValidator === 'function') { + chunkValidator = options.chunkValidator + } else { + chunkValidator = require('./validate-chunks') + } + + // item is a file + yield () => fileBuilder(entry, chunker(chunkValidator(source, options), options), ipld, options) + } else { + // item is a directory + yield () => dirBuilder(entry, ipld, options) + } + } +} + +module.exports = dagBuilder diff --git a/packages/ipfs-unixfs-importer/src/dag-builder/validate-chunks.js b/packages/ipfs-unixfs-importer/src/dag-builder/validate-chunks.js new file mode 100644 index 00000000..bf3037d3 --- /dev/null +++ b/packages/ipfs-unixfs-importer/src/dag-builder/validate-chunks.js @@ -0,0 +1,22 @@ +'use strict' + +const errCode = require('err-code') + +// make sure the content only emits buffer-a-likes +async function * validateChunks (source) { + for await (const content of source) { + if (content.length === undefined) { + throw errCode(new Error('Content was invalid'), 'ERR_INVALID_CONTENT') + } + + if (typeof content === 'string' || content instanceof String) { + yield Buffer.from(content, 'utf8') + } else if (Array.isArray(content)) { + yield Buffer.from(content) + } else { + yield content + } + } +} + +module.exports = validateChunks diff --git a/packages/ipfs-unixfs-importer/src/dir-flat.js b/packages/ipfs-unixfs-importer/src/dir-flat.js new file mode 100644 index 00000000..50866044 --- /dev/null +++ b/packages/ipfs-unixfs-importer/src/dir-flat.js @@ -0,0 +1,92 @@ +'use strict' + +const { + DAGLink, + DAGNode +} = require('ipld-dag-pb') +const UnixFS = require('ipfs-unixfs') +const Dir = require('./dir') +const persist = require('./utils/persist') + +class DirFlat extends Dir { + constructor (props, options) { + super(props, options) + this._children = {} + } + + put (name, value) { + this.cid = undefined + this.size = undefined + + this._children[name] = value + } + + get (name) { + return this._children[name] + } + + childCount () { + return Object.keys(this._children).length + } + + directChildrenCount () { + return this.childCount() + } + + onlyChild () { + return this._children[Object.keys(this._children)[0]] + } + + * eachChildSeries () { + const keys = Object.keys(this._children) + + for (let i = 0; i < keys.length; i++) { + const key = keys[i] + + yield { + key: key, + child: this._children[key] + } + } + } + + async * flush (path, ipld) { + const children = Object.keys(this._children) + const links = [] + + for (let i = 0; i < children.length; i++) { + let child = this._children[children[i]] + + if (typeof child.flush === 'function') { + for await (const entry of child.flush(child.path, ipld)) { + child = entry + + yield child + } + } + + links.push(new DAGLink(children[i], child.size, child.cid)) + } + + const unixfs = new UnixFS({ + type: 'directory', + mtime: this.mtime, + mode: this.mode + }) + + const node = new DAGNode(unixfs.marshal(), links) + const cid = await persist(node, ipld, this.options) + + this.cid = cid + this.size = node.size + + yield { + cid, + unixfs, + path, + size: node.size + } + } +} + +module.exports = DirFlat diff --git a/packages/ipfs-unixfs-importer/src/dir-sharded.js b/packages/ipfs-unixfs-importer/src/dir-sharded.js new file mode 100644 index 00000000..e2959845 --- /dev/null +++ b/packages/ipfs-unixfs-importer/src/dir-sharded.js @@ -0,0 +1,162 @@ +'use strict' + +const { + DAGLink, + DAGNode +} = require('ipld-dag-pb') +const UnixFS = require('ipfs-unixfs') +const multihashing = require('multihashing-async') +const Dir = require('./dir') +const persist = require('./utils/persist') +const Bucket = require('hamt-sharding') +const mergeOptions = require('merge-options').bind({ ignoreUndefined: true }) + +const hashFn = async function (value) { + const hash = await multihashing(Buffer.from(value, 'utf8'), 'murmur3-128') + + // Multihashing inserts preamble of 2 bytes. Remove it. + // Also, murmur3 outputs 128 bit but, accidently, IPFS Go's + // implementation only uses the first 64, so we must do the same + // for parity.. + const justHash = hash.slice(2, 10) + const length = justHash.length + const result = Buffer.alloc(length) + // TODO: invert buffer because that's how Go impl does it + for (let i = 0; i < length; i++) { + result[length - i - 1] = justHash[i] + } + + return result +} +hashFn.code = 0x22 // TODO: get this from multihashing-async? + +const defaultOptions = { + hamtHashFn: hashFn, + hamtBucketBits: 8 +} + +class DirSharded extends Dir { + constructor (props, options) { + options = mergeOptions(defaultOptions, options) + + super(props, options) + + this._bucket = Bucket({ + hashFn: options.hamtHashFn, + bits: options.hamtBucketBits + }) + } + + async put (name, value) { + await this._bucket.put(name, value) + } + + get (name) { + return this._bucket.get(name) + } + + childCount () { + return this._bucket.leafCount() + } + + directChildrenCount () { + return this._bucket.childrenCount() + } + + onlyChild () { + return this._bucket.onlyChild() + } + + async * eachChildSeries () { + for await (const { key, value } of this._bucket.eachLeafSeries()) { + yield { + key, + child: value + } + } + } + + async * flush (path, ipld) { + for await (const entry of flush(path, this._bucket, ipld, this, this.options)) { + yield entry + } + } +} + +module.exports = DirSharded + +module.exports.hashFn = hashFn + +async function * flush (path, bucket, ipld, shardRoot, options) { + const children = bucket._children + const links = [] + + for (let i = 0; i < children.length; i++) { + const child = children.get(i) + + if (!child) { + continue + } + + const labelPrefix = i.toString(16).toUpperCase().padStart(2, '0') + + if (Bucket.isBucket(child)) { + let shard + + for await (const subShard of await flush('', child, ipld, null, options)) { + shard = subShard + } + + links.push(new DAGLink(labelPrefix, shard.size, shard.cid)) + } else if (typeof child.value.flush === 'function') { + const dir = child.value + let flushedDir + + for await (const entry of dir.flush(dir.path, ipld)) { + flushedDir = entry + + yield flushedDir + } + + const label = labelPrefix + child.key + links.push(new DAGLink(label, flushedDir.size, flushedDir.cid)) + } else { + const value = child.value + + if (!value.node) { + if (value.cid) { + value.node = await ipld.get(value.cid) + } else { + continue + } + } + + const label = labelPrefix + child.key + const size = value.node.length || value.node.size || value.node.Size + + links.push(new DAGLink(label, size, value.cid)) + } + } + + // go-ipfs uses little endian, that's why we have to + // reverse the bit field before storing it + const data = Buffer.from(children.bitField().reverse()) + const dir = new UnixFS({ + type: 'hamt-sharded-directory', + data, + fanout: bucket.tableSize(), + hashType: options.hamtHashFn.code, + mtime: shardRoot && shardRoot.mtime, + mode: shardRoot && shardRoot.mode + }) + + const node = new DAGNode(dir.marshal(), links) + const cid = await persist(node, ipld, options) + + yield { + cid, + unixfs: dir, + path, + size: node.size + } +} diff --git a/packages/ipfs-unixfs-importer/src/dir.js b/packages/ipfs-unixfs-importer/src/dir.js new file mode 100644 index 00000000..24a1023c --- /dev/null +++ b/packages/ipfs-unixfs-importer/src/dir.js @@ -0,0 +1,8 @@ +'use strict' + +module.exports = class Dir { + constructor (props, options) { + this.options = options || {} + Object.assign(this, props) + } +} diff --git a/packages/ipfs-unixfs-importer/src/flat-to-shard.js b/packages/ipfs-unixfs-importer/src/flat-to-shard.js new file mode 100644 index 00000000..1617b4dc --- /dev/null +++ b/packages/ipfs-unixfs-importer/src/flat-to-shard.js @@ -0,0 +1,47 @@ +'use strict' + +const DirSharded = require('./dir-sharded') + +module.exports = async function flatToShard (child, dir, threshold, options) { + let newDir = dir + + if (dir.flat && dir.directChildrenCount() >= threshold) { + newDir = await convertToShard(dir, options) + } + + const parent = newDir.parent + + if (parent) { + if (newDir !== dir) { + if (child) { + child.parent = newDir + } + + await parent.put(newDir.parentKey, newDir) + } + + return flatToShard(newDir, parent, threshold, options) + } + + return newDir +} + +async function convertToShard (oldDir, options) { + const newDir = new DirSharded({ + root: oldDir.root, + dir: true, + parent: oldDir.parent, + parentKey: oldDir.parentKey, + path: oldDir.path, + dirty: oldDir.dirty, + flat: false, + mtime: oldDir.mtime, + mode: oldDir.mode + }, options) + + for await (const { key, child } of oldDir.eachChildSeries()) { + await newDir.put(key, child) + } + + return newDir +} diff --git a/packages/ipfs-unixfs-importer/src/index.js b/packages/ipfs-unixfs-importer/src/index.js new file mode 100644 index 00000000..052acff3 --- /dev/null +++ b/packages/ipfs-unixfs-importer/src/index.js @@ -0,0 +1,85 @@ +'use strict' + +const parallelBatch = require('it-parallel-batch') +const mergeOptions = require('merge-options').bind({ ignoreUndefined: true }) + +const defaultOptions = { + chunker: 'fixed', + strategy: 'balanced', // 'flat', 'trickle' + rawLeaves: false, + onlyHash: false, + reduceSingleLeafToSelf: true, + codec: 'dag-pb', + hashAlg: 'sha2-256', + leafType: 'file', // 'raw' + cidVersion: 0, + progress: () => () => {}, + shardSplitThreshold: 1000, + fileImportConcurrency: 50, + blockWriteConcurrency: 10, + minChunkSize: 262144, + maxChunkSize: 262144, + avgChunkSize: 262144, + window: 16, + polynomial: 17437180132763653, // https://github.com/ipfs/go-ipfs-chunker/blob/d0125832512163708c0804a3cda060e21acddae4/rabin.go#L11 + maxChildrenPerNode: 174, + layerRepeat: 4, + wrapWithDirectory: false, + pin: true, + recursive: false, + hidden: false, + preload: true, + chunkValidator: null, + importBuffer: null +} + +module.exports = async function * (source, ipld, options = {}) { + const opts = mergeOptions(defaultOptions, options) + + if (options.cidVersion > 0 && options.rawLeaves === undefined) { + // if the cid version is 1 or above, use raw leaves as this is + // what go does. + opts.rawLeaves = true + } + + if (options.hashAlg !== undefined && options.rawLeaves === undefined) { + // if a non-default hash alg has been specified, use raw leaves as this is + // what go does. + opts.rawLeaves = true + } + + // go-ifps trickle dag defaults to unixfs raw leaves, balanced dag defaults to file leaves + if (options.strategy === 'trickle') { + opts.leafType = 'raw' + opts.reduceSingleLeafToSelf = false + } + + if (options.format) { + opts.codec = options.format + } + + let dagBuilder + + if (typeof options.dagBuilder === 'function') { + dagBuilder = options.dagBuilder + } else { + dagBuilder = require('./dag-builder') + } + + let treeBuilder + + if (typeof options.treeBuilder === 'function') { + treeBuilder = options.treeBuilder + } else { + treeBuilder = require('./tree-builder') + } + + for await (const entry of treeBuilder(parallelBatch(dagBuilder(source, ipld, opts), opts.fileImportConcurrency), ipld, opts)) { + yield { + cid: entry.cid, + path: entry.path, + unixfs: entry.unixfs, + size: entry.size + } + } +} diff --git a/packages/ipfs-unixfs-importer/src/tree-builder.js b/packages/ipfs-unixfs-importer/src/tree-builder.js new file mode 100644 index 00000000..feb9f42d --- /dev/null +++ b/packages/ipfs-unixfs-importer/src/tree-builder.js @@ -0,0 +1,100 @@ +'use strict' + +const DirFlat = require('./dir-flat') +const flatToShard = require('./flat-to-shard') +const Dir = require('./dir') +const toPathComponents = require('./utils/to-path-components') +const errCode = require('err-code') +const first = require('it-first') + +async function addToTree (elem, tree, options) { + const pathElems = toPathComponents(elem.path || '') + const lastIndex = pathElems.length - 1 + let parent = tree + let currentPath = '' + + for (let i = 0; i < pathElems.length; i++) { + const pathElem = pathElems[i] + + currentPath += `${currentPath ? '/' : ''}${pathElem}` + + const last = (i === lastIndex) + parent.dirty = true + parent.cid = null + parent.size = null + + if (last) { + await parent.put(pathElem, elem) + tree = await flatToShard(null, parent, options.shardSplitThreshold, options) + } else { + let dir = await parent.get(pathElem) + + if (!dir || !(dir instanceof Dir)) { + dir = new DirFlat({ + dir: true, + parent: parent, + parentKey: pathElem, + path: currentPath, + dirty: true, + flat: true, + mtime: dir && dir.unixfs && dir.unixfs.mtime, + mode: dir && dir.unixfs && dir.unixfs.mode + }, options) + } + + await parent.put(pathElem, dir) + + parent = dir + } + } + + return tree +} + +async function * treeBuilder (source, ipld, options) { + let tree = new DirFlat({ + root: true, + dir: true, + path: '', + dirty: true, + flat: true + }, options) + + for await (const entry of source) { + if (!entry) { + continue + } + + tree = await addToTree(entry, tree, options) + + if (!entry.unixfs || !entry.unixfs.isDirectory()) { + yield entry + } + } + + if (!options.wrapWithDirectory) { + if (tree.childCount() > 1) { + throw errCode(new Error('detected more than one root'), 'ERR_MORE_THAN_ONE_ROOT') + } + + const unwrapped = await first(tree.eachChildSeries()) + + if (!unwrapped) { + return + } + + tree = unwrapped.child + } + + if (!(tree instanceof Dir)) { + if (tree && tree.unixfs && tree.unixfs.isDirectory()) { + yield tree + } + + return + } + + yield * tree.flush(tree.path, ipld) +} + +module.exports = treeBuilder diff --git a/packages/ipfs-unixfs-importer/src/utils/persist.js b/packages/ipfs-unixfs-importer/src/utils/persist.js new file mode 100644 index 00000000..e6970b65 --- /dev/null +++ b/packages/ipfs-unixfs-importer/src/utils/persist.js @@ -0,0 +1,33 @@ +'use strict' + +const mh = require('multihashing-async').multihash +const mc = require('multicodec') + +const persist = (node, ipld, options) => { + if (!options.codec && node.length) { + options.cidVersion = 1 + options.codec = 'raw' + } + + if (!options.codec) { + options.codec = 'dag-pb' + } + + if (isNaN(options.hashAlg)) { + options.hashAlg = mh.names[options.hashAlg] + } + + if (options.hashAlg !== mh.names['sha2-256']) { + options.cidVersion = 1 + } + + if (options.format) { + options.codec = options.format + } + + const format = mc[options.codec.toUpperCase().replace(/-/g, '_')] + + return ipld.put(node, format, options) +} + +module.exports = persist diff --git a/packages/ipfs-unixfs-importer/src/utils/to-path-components.js b/packages/ipfs-unixfs-importer/src/utils/to-path-components.js new file mode 100644 index 00000000..5e826272 --- /dev/null +++ b/packages/ipfs-unixfs-importer/src/utils/to-path-components.js @@ -0,0 +1,11 @@ +'use strict' + +const toPathComponents = (path = '') => { + // split on / unless escaped with \ + return (path + .trim() + .match(/([^\\^/]|\\\/)+/g) || []) + .filter(Boolean) +} + +module.exports = toPathComponents diff --git a/packages/ipfs-unixfs-importer/test/benchmark.spec.js b/packages/ipfs-unixfs-importer/test/benchmark.spec.js new file mode 100644 index 00000000..fae3f483 --- /dev/null +++ b/packages/ipfs-unixfs-importer/test/benchmark.spec.js @@ -0,0 +1,70 @@ +/* eslint-env mocha */ +'use strict' + +const importer = require('../src') + +const IPLD = require('ipld') +const inMemory = require('ipld-in-memory') +const bufferStream = require('it-buffer-stream') +const all = require('it-all') + +const REPEATS = 10 +const FILE_SIZE = Math.pow(2, 20) * 500 // 500MB +const CHUNK_SIZE = 65536 + +describe.skip('benchmark', function () { + this.timeout(30 * 1000) + + let ipld + + before(async () => { + ipld = await inMemory(IPLD) + }) + + const times = [] + + after(() => { + console.info('Percent\tms') // eslint-disable-line no-console + times.forEach((time, index) => { + console.info(`${index}\t${parseInt(time / REPEATS)}`) // eslint-disable-line no-console + }) + }) + + for (let i = 0; i < REPEATS; i++) { + it(`run ${i}`, async () => { // eslint-disable-line no-loop-func + this.timeout(0) + + const size = FILE_SIZE + let read = 0 + let lastDate = Date.now() + let lastPercent = 0 + + const options = { + progress: (prog) => { + read += prog + + const percent = parseInt((read / size) * 100) + + if (percent > lastPercent) { + times[percent] = (times[percent] || 0) + (Date.now() - lastDate) + + lastDate = Date.now() + lastPercent = percent + } + } + } + + const buf = Buffer.alloc(CHUNK_SIZE).fill(0) + + await all(importer([{ + path: '200Bytes.txt', + content: bufferStream(size, { + chunkSize: CHUNK_SIZE, + generator: () => { + return buf + } + }) + }], ipld, options)) + }) + } +}) diff --git a/packages/ipfs-unixfs-importer/test/builder-balanced.spec.js b/packages/ipfs-unixfs-importer/test/builder-balanced.spec.js new file mode 100644 index 00000000..17242a31 --- /dev/null +++ b/packages/ipfs-unixfs-importer/test/builder-balanced.spec.js @@ -0,0 +1,70 @@ +/* eslint-env mocha */ +'use strict' + +const chai = require('chai') +chai.use(require('dirty-chai')) +const expect = chai.expect +const builder = require('../src/dag-builder/file/balanced') +const all = require('it-all') + +function reduce (leaves) { + if (leaves.length > 1) { + return { children: leaves } + } else { + return leaves[0] + } +} + +const options = { + maxChildrenPerNode: 3 +} + +describe('builder: balanced', () => { + it('reduces one value into itself', async () => { + const source = [1] + + const result = await all(builder(source, reduce, options)) + + expect(result).to.deep.equal(source) + }) + + it('reduces 3 values into parent', async () => { + const source = [1, 2, 3] + + const result = await all(builder(source, reduce, options)) + + expect(result).to.deep.equal([{ + children: [1, 2, 3] + }]) + }) + + it('obeys max children per node', async () => { + const source = [1, 2, 3, 4] + + const result = await all(builder(source, reduce, options)) + + expect(result).to.deep.equal([{ + children: [{ + children: [1, 2, 3] + }, + 4 + ] + }]) + }) + + it('refolds 2 parent nodes', async () => { + const source = [1, 2, 3, 4, 5, 6, 7] + + const result = await all(builder(source, reduce, options)) + + expect(result).to.deep.equal([{ + children: [{ + children: [1, 2, 3] + }, { + children: [4, 5, 6] + }, + 7 + ] + }]) + }) +}) diff --git a/packages/ipfs-unixfs-importer/test/builder-dir-sharding.spec.js b/packages/ipfs-unixfs-importer/test/builder-dir-sharding.spec.js new file mode 100644 index 00000000..b52b07b8 --- /dev/null +++ b/packages/ipfs-unixfs-importer/test/builder-dir-sharding.spec.js @@ -0,0 +1,294 @@ +/* eslint-env mocha */ +'use strict' + +const importer = require('../src') +const exporter = require('ipfs-unixfs-exporter') + +const chai = require('chai') +chai.use(require('dirty-chai')) +const expect = chai.expect +const IPLD = require('ipld') +const inMemory = require('ipld-in-memory') +const all = require('it-all') +const last = require('it-last') + +describe('builder: directory sharding', () => { + let ipld + + before(async () => { + ipld = await inMemory(IPLD) + }) + + describe('basic dirbuilder', () => { + it('yields a non-sharded dir', async () => { + const content = Buffer.from('i have the best bytes') + const nodes = await all(importer([{ + path: 'a/b', + content + }], ipld, { + shardSplitThreshold: Infinity // never shard + })) + + expect(nodes.length).to.equal(2) + + expect(nodes[0].path).to.equal('a/b') + expect(nodes[1].path).to.equal('a') + + const dirNode = await exporter(nodes[1].cid, ipld) + expect(dirNode.unixfs.type).to.equal('directory') + + const fileNode = await exporter(nodes[0].cid, ipld) + expect(fileNode.unixfs.type).to.equal('file') + expect(Buffer.concat(await all(fileNode.content()))).to.deep.equal(content) + }) + + it('yields a sharded dir', async () => { + const nodes = await all(importer([{ + path: 'a/b', + content: Buffer.from('i have the best bytes') + }], ipld, { + shardSplitThreshold: 0 // always shard + })) + + expect(nodes.length).to.equal(2) + expect(nodes[0].path).to.equal('a/b') + expect(nodes[1].path).to.equal('a') + + const node = await exporter(nodes[1].cid, ipld) + + expect(node.unixfs.type).to.equal('hamt-sharded-directory') + }) + + it('exporting unsharded hash results in the correct files', async () => { + const content = 'i have the best bytes' + const nodes = await all(importer([{ + path: 'a/b', + content: Buffer.from(content) + }], ipld, { + shardSplitThreshold: Infinity // never shard + })) + + const nonShardedHash = nodes[1].cid + + const dir = await exporter(nonShardedHash, ipld) + const files = await all(dir.content()) + + expect(files.length).to.equal(1) + + const expectedHash = nonShardedHash.toBaseEncodedString() + + expect(dir.path).to.be.eql(expectedHash) + expect(dir.cid.toBaseEncodedString()).to.be.eql(expectedHash) + expect(files[0].path).to.be.eql(expectedHash + '/b') + expect(files[0].unixfs.fileSize()).to.be.eql(content.length) + + const fileContent = Buffer.concat(await all(files[0].content())) + + expect(fileContent.toString()).to.equal(content) + }) + + it('exporting sharded hash results in the correct files', async () => { + const content = 'i have the best bytes' + const nodes = await all(importer([{ + path: 'a/b', + content: Buffer.from(content) + }], ipld, { + shardSplitThreshold: 0 // always shard + })) + + const shardedHash = nodes[1].cid + + const dir = await exporter(shardedHash, ipld) + const files = await all(dir.content()) + + expect(files.length).to.equal(1) + + const expectedHash = shardedHash.toBaseEncodedString() + + expect(dir.path).to.be.eql(expectedHash) + expect(dir.cid.toBaseEncodedString()).to.be.eql(expectedHash) + expect(files[0].path).to.be.eql(expectedHash + '/b') + expect(files[0].unixfs.fileSize()).to.be.eql(content.length) + + const fileContent = Buffer.concat(await all(files[0].content())) + + expect(fileContent.toString()).to.equal(content) + }) + }) + + describe('big dir', function () { + this.timeout(30 * 1000) + + const maxDirs = 2000 + + it('imports a big dir', async () => { + const source = { + [Symbol.iterator]: function * () { + for (let i = 0; i < maxDirs; i++) { + yield { + path: 'big/' + i.toString().padStart(4, '0'), + content: Buffer.from(i.toString()) + } + } + } + } + + const nodes = await all(importer(source, ipld)) + + expect(nodes.length).to.equal(maxDirs + 1) + const last = nodes[nodes.length - 1] + expect(last.path).to.equal('big') + }) + + it('exports a big dir', async () => { + const source = { + [Symbol.iterator]: function * () { + for (let i = 0; i < maxDirs; i++) { + yield { + path: 'big/' + i.toString().padStart(4, '0'), + content: Buffer.from(i.toString()) + } + } + } + } + + const nodes = await all(importer(source, ipld)) + + expect(nodes.length).to.equal(maxDirs + 1) // files plus the containing directory + + const dir = await exporter(nodes[nodes.length - 1].cid, ipld) + + for await (const entry of dir.content()) { + const content = Buffer.concat(await all(entry.content())) + expect(content.toString()).to.equal(parseInt(entry.name, 10).toString()) + } + }) + }) + + describe('big nested dir', function () { + this.timeout(450 * 1000) + + const maxDirs = 2000 + const maxDepth = 3 + let rootHash + + before(async () => { + const source = { + [Symbol.iterator]: function * () { + let pending = maxDirs + let pendingDepth = maxDepth + let i = 0 + let depth = 1 + + while (pendingDepth && pending) { + i++ + const dir = [] + + for (let d = 0; d < depth; d++) { + dir.push('big') + } + + yield { + path: dir.concat(i.toString().padStart(4, '0')).join('/'), + content: Buffer.from(i.toString()) + } + + pending-- + if (!pending) { + pendingDepth-- + pending = maxDirs + i = 0 + depth++ + } + } + } + } + + const node = await last(importer(source, ipld)) + expect(node.path).to.equal('big') + + rootHash = node.cid + }) + + it('imports a big dir', async () => { + const dir = await exporter(rootHash, ipld) + + const verifyContent = async (node) => { + if (node.unixfs.type === 'file') { + const bufs = await all(node.content()) + const content = Buffer.concat(bufs) + expect(content.toString()).to.equal(parseInt(node.name, 10).toString()) + } else { + for await (const entry of node.content()) { + await verifyContent(entry) + } + } + } + + await verifyContent(dir) + }) + + it('exports a big dir', async () => { + const collectContent = async (node, entries = {}) => { + if (node.unixfs.type === 'file') { + entries[node.path] = { + content: Buffer.concat(await all(node.content())).toString() + } + } else { + entries[node.path] = node + + for await (const entry of node.content()) { + await collectContent(entry, entries) + } + } + + return entries + } + + const eachPath = (path) => { + if (!index) { + // first dir + if (depth === 1) { + expect(path).to.equal(dir.cid.toBaseEncodedString()) + } + + const entry = entries[path] + expect(entry).to.exist() + expect(entry.content).to.not.be.a('string') + } else { + // dir entries + const pathElements = path.split('/') + expect(pathElements.length).to.equal(depth + 1) + const lastElement = pathElements[pathElements.length - 1] + expect(lastElement).to.equal(index.toString().padStart(4, '0')) + expect(entries[path].content).to.equal(index.toString()) + } + index++ + if (index > maxDirs) { + index = 0 + depth++ + } + } + + const dir = await exporter(rootHash, ipld) + + const entries = await collectContent(dir) + let index = 0 + let depth = 1 + + const paths = Object.keys(entries).sort() + expect(paths.length).to.equal(maxDepth * maxDirs + maxDepth) + paths.forEach(eachPath) + }) + + it('exports a big dir with subpath', async () => { + const exportHash = rootHash.toBaseEncodedString() + '/big/big/2000' + + const node = await exporter(exportHash, ipld) + expect(node.path).to.equal(exportHash) + + const content = Buffer.concat(await all(node.content())) + expect(content.toString()).to.equal('2000') + }) + }) +}) diff --git a/packages/ipfs-unixfs-importer/test/builder-flat.spec.js b/packages/ipfs-unixfs-importer/test/builder-flat.spec.js new file mode 100644 index 00000000..e3f0339e --- /dev/null +++ b/packages/ipfs-unixfs-importer/test/builder-flat.spec.js @@ -0,0 +1,32 @@ +/* eslint-env mocha */ +'use strict' + +const chai = require('chai') +chai.use(require('dirty-chai')) +const expect = chai.expect +const builder = require('../src/dag-builder/file/flat') +const all = require('it-all') + +function reduce (leaves) { + if (leaves.length > 1) { + return { children: leaves } + } else { + return leaves[0] + } +} + +describe('builder: flat', () => { + it('reduces one value into itself', async () => { + const source = [1] + const result = await all(builder(source, reduce)) + + expect(result).to.be.eql([1]) + }) + + it('reduces 2 values into parent', async () => { + const source = [1, 2] + const result = await all(builder(source, reduce)) + + expect(result).to.be.eql([{ children: [1, 2] }]) + }) +}) diff --git a/packages/ipfs-unixfs-importer/test/builder-only-hash.spec.js b/packages/ipfs-unixfs-importer/test/builder-only-hash.spec.js new file mode 100644 index 00000000..e7e7642c --- /dev/null +++ b/packages/ipfs-unixfs-importer/test/builder-only-hash.spec.js @@ -0,0 +1,47 @@ +/* eslint-env mocha */ +'use strict' + +const chai = require('chai') +chai.use(require('dirty-chai')) +const expect = chai.expect +const IPLD = require('ipld') +const inMemory = require('ipld-in-memory') +const builder = require('../src/dag-builder') +const all = require('it-all') + +describe('builder: onlyHash', () => { + let ipld + + before(async () => { + ipld = await inMemory(IPLD) + }) + + it('will only chunk and hash if passed an "onlyHash" option', async () => { + const nodes = await all(builder([{ + path: 'foo.txt', + content: Buffer.from([0, 1, 2, 3, 4]) + }], ipld, { + onlyHash: true, + chunker: 'fixed', + strategy: 'balanced', + progress: () => {}, + leafType: 'file', + reduceSingleLeafToSelf: true, + format: 'dag-pb', + hashAlg: 'sha2-256', + wrap: true, + maxChunkSize: 1024, + maxChildrenPerNode: 254 + })) + + expect(nodes.length).to.equal(1) + + try { + await ipld.get((await nodes[0]()).cid) + + throw new Error('Should have errored') + } catch (err) { + expect(err.code).to.equal('ERR_NOT_FOUND') + } + }) +}) diff --git a/packages/ipfs-unixfs-importer/test/builder-trickle-dag.spec.js b/packages/ipfs-unixfs-importer/test/builder-trickle-dag.spec.js new file mode 100644 index 00000000..ba6c239a --- /dev/null +++ b/packages/ipfs-unixfs-importer/test/builder-trickle-dag.spec.js @@ -0,0 +1,574 @@ +/* eslint-env mocha */ +'use strict' + +const chai = require('chai') +chai.use(require('dirty-chai')) +const expect = chai.expect +const builder = require('../src/dag-builder/file/trickle') +const all = require('it-all') + +const createValues = (max) => { + const output = [] + + for (let i = 0; i < max; i++) { + output.push(i) + } + + return output +} + +function reduce (leaves) { + if (leaves.length > 1) { + return { children: leaves } + } else { + return leaves[0] + } +} + +const options = { + maxChildrenPerNode: 3, + layerRepeat: 2 +} + +describe('builder: trickle', () => { + it('reduces one value into itself', async () => { + const result = await all(builder([1], reduce, options)) + + expect(result).to.deep.equal([1]) + }) + + it('reduces 3 values into parent', async () => { + const result = await all(builder(createValues(3), reduce, options)) + + expect(result).to.deep.equal([{ + children: [ + 0, + 1, + 2 + ] + }]) + }) + + it('reduces 6 values correctly', async () => { + const result = await all(builder(createValues(6), reduce, options)) + + expect(result).to.deep.equal([{ + children: [ + 0, + 1, + 2, + { + children: [ + 3, + 4, + 5 + ] + } + ] + }]) + }) + + it('reduces 9 values correctly', async () => { + const result = await all(builder(createValues(9), reduce, options)) + + expect(result).to.deep.equal([{ + children: [ + 0, + 1, + 2, + { + children: [ + 3, + 4, + 5 + ] + }, + { + children: [ + 6, + 7, + 8 + ] + } + ] + }]) + }) + + it('reduces 12 values correctly', async () => { + const result = await all(builder(createValues(12), reduce, options)) + + expect(result).to.deep.equal([{ + children: [ + 0, + 1, + 2, + { + children: [ + 3, + 4, + 5 + ] + }, + { + children: [ + 6, + 7, + 8 + ] + }, + { + children: [ + 9, + 10, + 11 + ] + } + ] + }]) + }) + + it('reduces 21 values correctly', async () => { + const result = await all(builder(createValues(21), reduce, options)) + + expect(result).to.deep.equal([{ + children: [ + 0, + 1, + 2, + { + children: [ + 3, + 4, + 5 + ] + }, + { + children: [ + 6, + 7, + 8 + ] + }, + { + children: [ + 9, + 10, + 11, + { + children: [ + 12, + 13, + 14 + ] + }, + { + children: [ + 15, + 16, + 17 + ] + } + ] + }, + { + children: [ + 18, + 19, + 20 + ] + } + ] + }]) + }) + + it('reduces 68 values correctly', async () => { + const result = await all(builder(createValues(68), reduce, options)) + + expect(result).to.deep.equal([ + { + children: [ + 0, + 1, + 2, + { + children: [ + 3, + 4, + 5 + ] + }, + { + children: [ + 6, + 7, + 8 + ] + }, + { + children: [ + 9, + 10, + 11, + { + children: [ + 12, + 13, + 14 + ] + }, + { + children: [ + 15, + 16, + 17 + ] + } + ] + }, + { + children: [ + 18, + 19, + 20, + { + children: [ + 21, + 22, + 23 + ] + }, + { + children: [ + 24, + 25, + 26 + ] + } + ] + }, + { + children: [ + 27, + 28, + 29, + { + children: [ + 30, + 31, + 32 + ] + }, + { + children: [ + 33, + 34, + 35 + ] + }, + { + children: [ + 36, + 37, + 38, + { + children: [ + 39, + 40, + 41 + ] + }, + { + children: [ + 42, + 43, + 44 + ] + } + ] + }, + { + children: [ + 45, + 46, + 47, + { + children: [ + 48, + 49, + 50 + ] + }, + { + children: [ + 51, + 52, + 53 + ] + } + ] + } + ] + }, + { + children: [ + 54, + 55, + 56, + { + children: [ + 57, + 58, + 59 + ] + }, + { + children: [ + 60, + 61, + 62 + ] + }, + { + children: [ + 63, + 64, + 65, + { + children: [ + 66, + 67 + ] + } + ] + } + ] + } + ] + } + ]) + }) + + it('reduces 93 values correctly', async () => { + const result = await all(builder(createValues(93), reduce, options)) + + expect(result).to.deep.equal([ + { + children: [ + 0, + 1, + 2, + { + children: [ + 3, + 4, + 5 + ] + }, + { + children: [ + 6, + 7, + 8 + ] + }, + { + children: [ + 9, + 10, + 11, + { + children: [ + 12, + 13, + 14 + ] + }, + { + children: [ + 15, + 16, + 17 + ] + } + ] + }, + { + children: [ + 18, + 19, + 20, + { + children: [ + 21, + 22, + 23 + ] + }, + { + children: [ + 24, + 25, + 26 + ] + } + ] + }, + { + children: [ + 27, + 28, + 29, + { + children: [ + 30, + 31, + 32 + ] + }, + { + children: [ + 33, + 34, + 35 + ] + }, + { + children: [ + 36, + 37, + 38, + { + children: [ + 39, + 40, + 41 + ] + }, + { + children: [ + 42, + 43, + 44 + ] + } + ] + }, + { + children: [ + 45, + 46, + 47, + { + children: [ + 48, + 49, + 50 + ] + }, + { + children: [ + 51, + 52, + 53 + ] + } + ] + } + ] + }, + { + children: [ + 54, + 55, + 56, + { + children: [ + 57, + 58, + 59 + ] + }, + { + children: [ + 60, + 61, + 62 + ] + }, + { + children: [ + 63, + 64, + 65, + { + children: [ + 66, + 67, + 68 + ] + }, + { + children: [ + 69, + 70, + 71 + ] + } + ] + }, + { + children: [ + 72, + 73, + 74, + { + children: [ + 75, + 76, + 77 + ] + }, + { + children: [ + 78, + 79, + 80 + ] + } + ] + } + ] + }, + { + children: [ + 81, + 82, + 83, + { + children: [ + 84, + 85, + 86 + ] + }, + { + children: [ + 87, + 88, + 89 + ] + }, + { + children: [ + 90, + 91, + 92 + ] + } + ] + } + ] + } + ]) + }) +}) diff --git a/packages/ipfs-unixfs-importer/test/builder.spec.js b/packages/ipfs-unixfs-importer/test/builder.spec.js new file mode 100644 index 00000000..c9d5522d --- /dev/null +++ b/packages/ipfs-unixfs-importer/test/builder.spec.js @@ -0,0 +1,108 @@ +/* eslint-env mocha */ +'use strict' + +const chai = require('chai') +chai.use(require('dirty-chai')) +const expect = chai.expect +const mh = require('multihashing-async').multihash +const IPLD = require('ipld') +const inMemory = require('ipld-in-memory') +const UnixFS = require('ipfs-unixfs') +const builder = require('../src/dag-builder') +const first = require('it-first') + +describe('builder', () => { + let ipld + + before(async () => { + ipld = await inMemory(IPLD) + }) + + const testMultihashes = Object.keys(mh.names).slice(1, 40) + const opts = { + strategy: 'flat', + chunker: 'fixed', + leafType: 'file', + reduceSingleLeafToSelf: true, + format: 'dag-pb', + hashAlg: 'sha2-256', + progress: () => {}, + maxChunkSize: 262144 + } + + it('allows multihash hash algorithm to be specified', async () => { + for (let i = 0; i < testMultihashes.length; i++) { + const hashAlg = testMultihashes[i] + const options = { + ...opts, + hashAlg + } + const content = String(Math.random() + Date.now()) + const inputFile = { + path: content + '.txt', + content: Buffer.from(content) + } + + const imported = await (await first(builder([inputFile], ipld, options)))() + + expect(imported).to.exist() + + // Verify multihash has been encoded using hashAlg + expect(mh.decode(imported.cid.multihash).name).to.equal(hashAlg) + + // Fetch using hashAlg encoded multihash + const node = await ipld.get(imported.cid) + + const fetchedContent = UnixFS.unmarshal(node.Data).data + expect(fetchedContent).to.deep.equal(inputFile.content) + } + }) + + it('allows multihash hash algorithm to be specified for big file', async function () { + this.timeout(30000) + + for (let i = 0; i < testMultihashes.length; i++) { + const hashAlg = testMultihashes[i] + const options = { + ...opts, + hashAlg + } + const content = String(Math.random() + Date.now()) + const inputFile = { + path: content + '.txt', + // Bigger than maxChunkSize + content: Buffer.alloc(262144 + 5).fill(1) + } + + const imported = await (await first(builder([inputFile], ipld, options)))() + + expect(imported).to.exist() + expect(mh.decode(imported.cid.multihash).name).to.equal(hashAlg) + } + }) + + it('allows multihash hash algorithm to be specified for a directory', async () => { + for (let i = 0; i < testMultihashes.length; i++) { + const hashAlg = testMultihashes[i] + + const options = { + ...opts, + hashAlg + } + const inputFile = { + path: `${String(Math.random() + Date.now())}-dir`, + content: null + } + + const imported = await (await first(builder([Object.assign({}, inputFile)], ipld, options)))() + + expect(mh.decode(imported.cid.multihash).name).to.equal(hashAlg) + + // Fetch using hashAlg encoded multihash + const node = await ipld.get(imported.cid) + + const meta = UnixFS.unmarshal(node.Data) + expect(meta.type).to.equal('directory') + } + }) +}) diff --git a/packages/ipfs-unixfs-importer/test/chunker-custom.spec.js b/packages/ipfs-unixfs-importer/test/chunker-custom.spec.js new file mode 100644 index 00000000..8bc80dda --- /dev/null +++ b/packages/ipfs-unixfs-importer/test/chunker-custom.spec.js @@ -0,0 +1,73 @@ +/* eslint-env mocha */ +'use strict' + +const importer = require('../src') + +const chai = require('chai') +chai.use(require('dirty-chai')) +const expect = chai.expect +const IPLD = require('ipld') +const inMemory = require('ipld-in-memory') +const mc = require('multicodec') + +// eslint bug https://github.com/eslint/eslint/issues/12459 +// eslint-disable-next-line require-await +const iter = async function * () { + yield Buffer.from('one') + yield Buffer.from('two') +} + +describe('custom chunker', function () { + let inmem + + const fromPartsTest = (iter, size) => async () => { + for await (const part of importer([{ + content: iter() + }], inmem, { + chunkValidator: source => source, + chunker: source => source, + bufferImporter: async function * (file, source, ipld, options) { + for await (const item of source) { + yield () => Promise.resolve(item) + } + } + })) { + expect(part.size).to.equal(size) + } + } + + before(async () => { + inmem = await inMemory(IPLD) + }) + + it('keeps custom chunking', async () => { + const chunker = source => source + const content = iter() + for await (const part of importer([{ path: 'test', content }], inmem, { + chunker + })) { + expect(part.size).to.equal(116) + } + }) + + // eslint bug https://github.com/eslint/eslint/issues/12459 + const multi = async function * () { + yield { + size: 11, + cid: await inmem.put(Buffer.from('hello world'), mc.RAW) + } + yield { + size: 11, + cid: await inmem.put(Buffer.from('hello world'), mc.RAW) + } + } + it('works with multiple parts', fromPartsTest(multi, 120)) + + const single = async function * () { + yield { + size: 11, + cid: await inmem.put(Buffer.from('hello world'), mc.RAW) + } + } + it('works with single part', fromPartsTest(single, 19)) +}) diff --git a/packages/ipfs-unixfs-importer/test/chunker-fixed-size.spec.js b/packages/ipfs-unixfs-importer/test/chunker-fixed-size.spec.js new file mode 100644 index 00000000..276702ab --- /dev/null +++ b/packages/ipfs-unixfs-importer/test/chunker-fixed-size.spec.js @@ -0,0 +1,85 @@ +/* eslint-env mocha */ +'use strict' + +const chunker = require('../src/chunker/fixed-size') +const chai = require('chai') +chai.use(require('dirty-chai')) +const expect = chai.expect +const isNode = require('detect-node') +const all = require('it-all') +const loadFixture = require('aegir/fixtures') +const rawFile = loadFixture((isNode ? __dirname : 'test') + '/fixtures/1MiB.txt') + +describe('chunker: fixed size', function () { + this.timeout(30000) + + it('chunks non flat buffers', async () => { + const b1 = Buffer.alloc(2 * 256) + const b2 = Buffer.alloc(1 * 256) + const b3 = Buffer.alloc(5 * 256) + + b1.fill('a') + b2.fill('b') + b3.fill('c') + + const chunks = await all(chunker([b1, b2, b3], { + maxChunkSize: 256 + })) + + expect(chunks).to.have.length(8) + chunks.forEach((chunk) => { + expect(chunk).to.have.length(256) + }) + }) + + it('256 Bytes chunks', async () => { + const input = [] + const buf = Buffer.from('a') + + for (let i = 0; i < (256 * 12); i++) { + input.push(buf) + } + const chunks = await all(chunker(input, { + maxChunkSize: 256 + })) + + expect(chunks).to.have.length(12) + chunks.forEach((chunk) => { + expect(chunk).to.have.length(256) + }) + }) + + it('256 KiB chunks', async () => { + const KiB256 = 262144 + const chunks = await all(chunker([rawFile], { + maxChunkSize: KiB256 + })) + + expect(chunks).to.have.length(4) + chunks.forEach((chunk) => { + expect(chunk).to.have.length(KiB256) + }) + }) + + it('256 KiB chunks of non scalar filesize', async () => { + const KiB256 = 262144 + const file = Buffer.concat([rawFile, Buffer.from('hello')]) + + const chunks = await all(chunker([file], { + maxChunkSize: KiB256 + })) + + expect(chunks).to.have.length(5) + let counter = 0 + + chunks.forEach((chunk) => { + if (chunk.length < KiB256) { + counter++ + } else { + expect(chunk).to.have.length(KiB256) + } + }) + + expect(counter).to.equal(1) + }) +}) diff --git a/packages/ipfs-unixfs-importer/test/chunker-rabin.spec.js b/packages/ipfs-unixfs-importer/test/chunker-rabin.spec.js new file mode 100644 index 00000000..9f9a4aff --- /dev/null +++ b/packages/ipfs-unixfs-importer/test/chunker-rabin.spec.js @@ -0,0 +1,135 @@ +/* eslint-env mocha */ +'use strict' + +const chunker = require('../src/chunker/rabin') +const chai = require('chai') +chai.use(require('dirty-chai')) +const expect = chai.expect +const loadFixture = require('aegir/fixtures') +const isNode = require('detect-node') +const all = require('it-all') + +const rawFile = loadFixture((isNode ? __dirname : 'test') + '/fixtures/1MiB.txt') + +describe('chunker: rabin', function () { + this.timeout(30000) + + const defaultOptions = { + avgChunkSize: 262144, + window: 64, + polynomial: 17437180132763653 + } + + it('chunks non flat buffers', async () => { + const b1 = Buffer.alloc(2 * 256) + const b2 = Buffer.alloc(1 * 256) + const b3 = Buffer.alloc(5 * 256) + + b1.fill('a') + b2.fill('b') + b3.fill('c') + + const chunks = await all(chunker([b1, b2, b3], { + ...defaultOptions, + minChunkSize: 48, + avgChunkSize: 96, + maxChunkSize: 192 + })) + + const size = chunks.reduce((acc, curr) => acc + curr.length, 0) + + expect(size).to.equal(b1.length + b2.length + b3.length) + + chunks.forEach((chunk, index) => { + if (index === chunks.length - 1) { + expect(chunk.length).to.equal(128) + } else { + expect(chunk.length).to.equal(192) + } + }) + }) + + it('uses default min and max chunk size when only avgChunkSize is specified', async () => { + const b1 = Buffer.alloc(10 * 256) + b1.fill('a') + + const chunks = await all(chunker([b1], { + ...defaultOptions, + maxChunkSize: 262144, + minChunkSize: 18, + avgChunkSize: 256 + })) + + chunks.forEach((chunk) => { + expect(chunk).to.have.length.gte(256 / 3) + expect(chunk).to.have.length.lte(256 * (256 / 2)) + }) + }) + + it('256 KiB avg chunks of non scalar filesize', async () => { + const KiB256 = 262144 + const file = Buffer.concat([rawFile, Buffer.from('hello')]) + const opts = { + ...defaultOptions, + minChunkSize: KiB256 / 3, + avgChunkSize: KiB256, + maxChunkSize: KiB256 + (KiB256 / 2) + } + + const chunks = await all(chunker([file], opts)) + + chunks.forEach((chunk) => { + expect(chunk).to.have.length.gte(opts.minChunkSize) + expect(chunk).to.have.length.lte(opts.maxChunkSize) + }) + }) + + it('throws when min chunk size is too small', async () => { + const opts = { + ...defaultOptions, + minChunkSize: 1, + maxChunkSize: 100 + } + + try { + await all(chunker([], opts)) + throw new Error('Should have thrown') + } catch (err) { + expect(err.code).to.equal('ERR_INVALID_MIN_CHUNK_SIZE') + } + }) + + it('throws when avg chunk size is not specified', async () => { + const opts = { + ...defaultOptions, + avgChunkSize: undefined + } + + try { + await all(chunker([], opts)) + throw new Error('Should have thrown') + } catch (err) { + expect(err.code).to.equal('ERR_INVALID_AVG_CHUNK_SIZE') + } + }) + + it('uses the min chunk size when max and avg are too small', async () => { + const file = Buffer.concat([rawFile, Buffer.from('hello')]) + const opts = { + ...defaultOptions, + minChunkSize: 100, + maxChunkSize: 5, + avgChunkSize: 5 + } + + const chunks = await all(chunker([file], opts)) + + chunks.forEach((chunk, index) => { + if (index === chunks.length - 1) { + expect(chunk.length).to.equal(81) + } else { + expect(chunk.length).to.equal(100) + } + }) + }) +}) diff --git a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt new file mode 100644 index 00000000..6e306c55 Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt differ diff --git a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.block b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.block new file mode 100644 index 00000000..f57749f0 Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.block differ diff --git a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.link-block0 b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.link-block0 new file mode 100644 index 00000000..a6e00f34 Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.link-block0 differ diff --git a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.link-block1 b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.link-block1 new file mode 100644 index 00000000..f4c039c2 Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.link-block1 differ diff --git a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.link-block2 b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.link-block2 new file mode 100644 index 00000000..64ce0aeb Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.link-block2 differ diff --git a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.link-block3 b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.link-block3 new file mode 100644 index 00000000..c1f9899a Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.link-block3 differ diff --git a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.link-block4 b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.link-block4 new file mode 100644 index 00000000..cbd601a6 Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.link-block4 differ diff --git a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-file b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-file new file mode 100644 index 00000000..e7229e0e --- /dev/null +++ b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-file @@ -0,0 +1 @@ +L     \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-raw0 b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-raw0 new file mode 100644 index 00000000..36ff3333 Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-raw0 differ diff --git a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-raw1 b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-raw1 new file mode 100644 index 00000000..fa626274 Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-raw1 differ diff --git a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-raw2 b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-raw2 new file mode 100644 index 00000000..f7ea5c2e Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-raw2 differ diff --git a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-raw3 b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-raw3 new file mode 100644 index 00000000..de99ffe5 Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-raw3 differ diff --git a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-raw4 b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-raw4 new file mode 100644 index 00000000..0e438a15 Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-raw4 differ diff --git a/packages/ipfs-unixfs-importer/test/fixtures/1MiB.txt b/packages/ipfs-unixfs-importer/test/fixtures/1MiB.txt new file mode 100644 index 00000000..60770c23 Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/fixtures/1MiB.txt differ diff --git a/packages/ipfs-unixfs-importer/test/fixtures/200Bytes.txt b/packages/ipfs-unixfs-importer/test/fixtures/200Bytes.txt new file mode 100644 index 00000000..d95023c7 --- /dev/null +++ b/packages/ipfs-unixfs-importer/test/fixtures/200Bytes.txt @@ -0,0 +1,4 @@ +wxxM{ +DzH/&^ RS/v,R +=Ng~pf1\[>%U1@Q׀2&m6qQ؁]|!K E~J ֕읝ojbn3eT)D+;s +컓:Ty!c3\*T7E?[Pv}A+cx~e \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/fixtures/200Bytes.txt.block b/packages/ipfs-unixfs-importer/test/fixtures/200Bytes.txt.block new file mode 100644 index 00000000..a655cf83 --- /dev/null +++ b/packages/ipfs-unixfs-importer/test/fixtures/200Bytes.txt.block @@ -0,0 +1,5 @@ + +wxxM{ +DzH/&^ RS/v,R +=Ng~pf1\[>%U1@Q׀2&m6qQ؁]|!K E~J ֕읝ojbn3eT)D+;s +컓:Ty!c3\*T7E?[Pv}A+cx~e \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/fixtures/200Bytes.txt.unixfs-file b/packages/ipfs-unixfs-importer/test/fixtures/200Bytes.txt.unixfs-file new file mode 100644 index 00000000..b93a6da8 --- /dev/null +++ b/packages/ipfs-unixfs-importer/test/fixtures/200Bytes.txt.unixfs-file @@ -0,0 +1,4 @@ +wxxM{ +DzH/&^ RS/v,R +=Ng~pf1\[>%U1@Q׀2&m6qQ؁]|!K E~J ֕읝ojbn3eT)D+;s +컓:Ty!c3\*T7E?[Pv}A+cx~e \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/fixtures/dir-big.block b/packages/ipfs-unixfs-importer/test/fixtures/dir-big.block new file mode 100644 index 00000000..ce734230 --- /dev/null +++ b/packages/ipfs-unixfs-importer/test/fixtures/dir-big.block @@ -0,0 +1,4 @@ +4 +" si"¹W%U1@Q׀2&m6qQ؁]|!K E~J ֕읝ojbn3eT)D+;s +컓:Ty!c3\*T7E?[Pv}A+cx~e \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/fixtures/dir-nested/level-1/200Bytes.txt b/packages/ipfs-unixfs-importer/test/fixtures/dir-nested/level-1/200Bytes.txt new file mode 100644 index 00000000..d95023c7 --- /dev/null +++ b/packages/ipfs-unixfs-importer/test/fixtures/dir-nested/level-1/200Bytes.txt @@ -0,0 +1,4 @@ +wxxM{ +DzH/&^ RS/v,R +=Ng~pf1\[>%U1@Q׀2&m6qQ؁]|!K E~J ֕읝ojbn3eT)D+;s +컓:Ty!c3\*T7E?[Pv}A+cx~e \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/fixtures/dir-small.block b/packages/ipfs-unixfs-importer/test/fixtures/dir-small.block new file mode 100644 index 00000000..5accb645 --- /dev/null +++ b/packages/ipfs-unixfs-importer/test/fixtures/dir-small.block @@ -0,0 +1,3 @@ +5 +" $G,A4{xZ/.D` 200Bytes.txt + \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/fixtures/dir-small.unixfs-dir b/packages/ipfs-unixfs-importer/test/fixtures/dir-small.unixfs-dir new file mode 100644 index 00000000..e19a122a --- /dev/null +++ b/packages/ipfs-unixfs-importer/test/fixtures/dir-small.unixfs-dir @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/fixtures/dir-small/200Bytes.txt b/packages/ipfs-unixfs-importer/test/fixtures/dir-small/200Bytes.txt new file mode 100644 index 00000000..d95023c7 --- /dev/null +++ b/packages/ipfs-unixfs-importer/test/fixtures/dir-small/200Bytes.txt @@ -0,0 +1,4 @@ +wxxM{ +DzH/&^ RS/v,R +=Ng~pf1\[>%U1@Q׀2&m6qQ؁]|!K E~J ֕읝ojbn3eT)D+;s +컓:Ty!c3\*T7E?[Pv}A+cx~e \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/fixtures/dir-with-empty-files/empty-file.txt b/packages/ipfs-unixfs-importer/test/fixtures/dir-with-empty-files/empty-file.txt new file mode 100644 index 00000000..e69de29b diff --git a/packages/ipfs-unixfs-importer/test/fixtures/empty.txt b/packages/ipfs-unixfs-importer/test/fixtures/empty.txt new file mode 100644 index 00000000..e69de29b diff --git a/packages/ipfs-unixfs-importer/test/fixtures/foo-big/1.2MiB.txt b/packages/ipfs-unixfs-importer/test/fixtures/foo-big/1.2MiB.txt new file mode 100644 index 00000000..6e306c55 Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/fixtures/foo-big/1.2MiB.txt differ diff --git a/packages/ipfs-unixfs-importer/test/fixtures/foo/bar/200Bytes.txt b/packages/ipfs-unixfs-importer/test/fixtures/foo/bar/200Bytes.txt new file mode 100644 index 00000000..d95023c7 --- /dev/null +++ b/packages/ipfs-unixfs-importer/test/fixtures/foo/bar/200Bytes.txt @@ -0,0 +1,4 @@ +wxxM{ +DzH/&^ RS/v,R +=Ng~pf1\[>%U1@Q׀2&m6qQ؁]|!K E~J ֕읝ojbn3eT)D+;s +컓:Ty!c3\*T7E?[Pv}A+cx~e \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/fixtures/ipfsmarket-1.ogv b/packages/ipfs-unixfs-importer/test/fixtures/ipfsmarket-1.ogv new file mode 100644 index 00000000..55e83f48 Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/fixtures/ipfsmarket-1.ogv differ diff --git a/packages/ipfs-unixfs-importer/test/fixtures/pam/1.2MiB.txt b/packages/ipfs-unixfs-importer/test/fixtures/pam/1.2MiB.txt new file mode 100644 index 00000000..6e306c55 Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/fixtures/pam/1.2MiB.txt differ diff --git a/packages/ipfs-unixfs-importer/test/fixtures/pam/pum/1.2MiB.txt b/packages/ipfs-unixfs-importer/test/fixtures/pam/pum/1.2MiB.txt new file mode 100644 index 00000000..6e306c55 Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/fixtures/pam/pum/1.2MiB.txt differ diff --git a/packages/ipfs-unixfs-importer/test/fixtures/pam/pum/200Bytes.txt b/packages/ipfs-unixfs-importer/test/fixtures/pam/pum/200Bytes.txt new file mode 100644 index 00000000..d95023c7 --- /dev/null +++ b/packages/ipfs-unixfs-importer/test/fixtures/pam/pum/200Bytes.txt @@ -0,0 +1,4 @@ +wxxM{ +DzH/&^ RS/v,R +=Ng~pf1\[>%U1@Q׀2&m6qQ؁]|!K E~J ֕읝ojbn3eT)D+;s +컓:Ty!c3\*T7E?[Pv}A+cx~e \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/fixtures/pim/1.2MiB.txt b/packages/ipfs-unixfs-importer/test/fixtures/pim/1.2MiB.txt new file mode 100644 index 00000000..6e306c55 Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/fixtures/pim/1.2MiB.txt differ diff --git a/packages/ipfs-unixfs-importer/test/fixtures/pim/200Bytes.txt b/packages/ipfs-unixfs-importer/test/fixtures/pim/200Bytes.txt new file mode 100644 index 00000000..d95023c7 --- /dev/null +++ b/packages/ipfs-unixfs-importer/test/fixtures/pim/200Bytes.txt @@ -0,0 +1,4 @@ +wxxM{ +DzH/&^ RS/v,R +=Ng~pf1\[>%U1@Q׀2&m6qQ؁]|!K E~J ֕읝ojbn3eT)D+;s +컓:Ty!c3\*T7E?[Pv}A+cx~e \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/fixtures/small.txt b/packages/ipfs-unixfs-importer/test/fixtures/small.txt new file mode 100644 index 00000000..f81fce04 --- /dev/null +++ b/packages/ipfs-unixfs-importer/test/fixtures/small.txt @@ -0,0 +1 @@ +this is a file diff --git a/packages/ipfs-unixfs-importer/test/fixtures/test-file.txt b/packages/ipfs-unixfs-importer/test/fixtures/test-file.txt new file mode 100644 index 00000000..b3ab23d1 --- /dev/null +++ b/packages/ipfs-unixfs-importer/test/fixtures/test-file.txt @@ -0,0 +1,20361 @@ +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + + diff --git a/packages/ipfs-unixfs-importer/test/fixtures/test-file.txt.link-block0 b/packages/ipfs-unixfs-importer/test/fixtures/test-file.txt.link-block0 new file mode 100644 index 00000000..f9810363 --- /dev/null +++ b/packages/ipfs-unixfs-importer/test/fixtures/test-file.txt.link-block0 @@ -0,0 +1,4728 @@ + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. +There have been many attempts at constructing a global +distributed file system. Some \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/fixtures/test-video.ogv b/packages/ipfs-unixfs-importer/test/fixtures/test-video.ogv new file mode 100644 index 00000000..55e83f48 Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/fixtures/test-video.ogv differ diff --git a/packages/ipfs-unixfs-importer/test/hash-parity-with-go-ipfs.spec.js b/packages/ipfs-unixfs-importer/test/hash-parity-with-go-ipfs.spec.js new file mode 100644 index 00000000..94e44fda --- /dev/null +++ b/packages/ipfs-unixfs-importer/test/hash-parity-with-go-ipfs.spec.js @@ -0,0 +1,51 @@ +/* eslint-env mocha */ +'use strict' + +const importer = require('../src') + +const chai = require('chai') +chai.use(require('dirty-chai')) +const expect = chai.expect +const IPLD = require('ipld') +const inMemory = require('ipld-in-memory') +const randomByteStream = require('./helpers/finite-pseudorandom-byte-stream') +const first = require('it-first') + +const strategies = [ + 'flat', + 'trickle', + 'balanced' +] + +const expectedHashes = { + flat: 'QmeJ9FRWKnXZQiX5CM1E8j4gpGbg6otpgajThqsbnBpoyD', + balanced: 'QmRdPboiJQoZ5cdazR9a8vGqdJvWg6M5bfdtUSKNHpuscj', + trickle: 'QmdZcefqMZ3tzdS4CRBN5s1c67eS3nQzN8TNXFBYfgofoy' +} + +strategies.forEach(strategy => { + const options = { + strategy: strategy + } + + describe('go-ipfs interop using importer:' + strategy, () => { + let ipld + + before(async () => { + ipld = await inMemory(IPLD) + }) + + it('yields the same tree as go-ipfs', async function () { + this.timeout(100 * 1000) + + const source = [{ + path: 'big.dat', + content: randomByteStream(45900000, 7382) + }] + + const file = await first(importer(source, ipld, options)) + + expect(file.cid.toBaseEncodedString()).to.be.equal(expectedHashes[strategy]) + }) + }) +}) diff --git a/packages/ipfs-unixfs-importer/test/helpers/collect-leaf-cids.js b/packages/ipfs-unixfs-importer/test/helpers/collect-leaf-cids.js new file mode 100644 index 00000000..4ef6a4e9 --- /dev/null +++ b/packages/ipfs-unixfs-importer/test/helpers/collect-leaf-cids.js @@ -0,0 +1,20 @@ +'use strict' + +module.exports = function (cid, ipld) { + async function * traverse (cid) { + const node = await ipld.get(cid) + + if (Buffer.isBuffer(node) || !node.Links.length) { + yield { + node, + cid + } + + return + } + + node.Links.forEach(link => traverse(link.Hash)) + } + + return traverse(cid) +} diff --git a/packages/ipfs-unixfs-importer/test/helpers/finite-pseudorandom-byte-stream.js b/packages/ipfs-unixfs-importer/test/helpers/finite-pseudorandom-byte-stream.js new file mode 100644 index 00000000..3b07c734 --- /dev/null +++ b/packages/ipfs-unixfs-importer/test/helpers/finite-pseudorandom-byte-stream.js @@ -0,0 +1,24 @@ +'use strict' + +const REPEATABLE_CHUNK_SIZE = 300000 + +module.exports = function * (maxSize, seed) { + const chunks = Math.ceil(maxSize / REPEATABLE_CHUNK_SIZE) + let emitted = 0 + const buf = Buffer.alloc(REPEATABLE_CHUNK_SIZE) + + while (emitted !== chunks) { + for (let i = 0; i < buf.length; i++) { + buf[i] = 256 & Math.floor(random(seed) * 256) + } + + yield buf + + emitted++ + } +} + +function random (seed) { + const x = Math.sin(seed) * 10000 + return x - Math.floor(x) +} diff --git a/packages/ipfs-unixfs-importer/test/helpers/random-byte-stream.js b/packages/ipfs-unixfs-importer/test/helpers/random-byte-stream.js new file mode 100644 index 00000000..776ae90f --- /dev/null +++ b/packages/ipfs-unixfs-importer/test/helpers/random-byte-stream.js @@ -0,0 +1,15 @@ +'use strict' + +module.exports = function * randomByteStream (seed) { + while (true) { + const r = Math.floor(random(seed) * 256) + seed = r + + yield Buffer.from([r]) + } +} + +function random (seed) { + const x = Math.sin(seed) * 10000 + return x - Math.floor(x) +} diff --git a/packages/ipfs-unixfs-importer/test/helpers/stream-to-array.js b/packages/ipfs-unixfs-importer/test/helpers/stream-to-array.js new file mode 100644 index 00000000..e69de29b diff --git a/packages/ipfs-unixfs-importer/test/import-export-nested-dir.spec.js b/packages/ipfs-unixfs-importer/test/import-export-nested-dir.spec.js new file mode 100644 index 00000000..ae607121 --- /dev/null +++ b/packages/ipfs-unixfs-importer/test/import-export-nested-dir.spec.js @@ -0,0 +1,115 @@ +/* eslint-env mocha */ +'use strict' + +const chai = require('chai') +chai.use(require('dirty-chai')) +const expect = chai.expect +const IPLD = require('ipld') +const inMemory = require('ipld-in-memory') +const all = require('it-all') +const importer = require('../src') +const exporter = require('ipfs-unixfs-exporter') + +describe('import and export: directory', () => { + const rootHash = 'QmdCrquDwd7RfZ6GCZFEVADwe8uyyw1YmF9mtAB7etDgmK' + let ipld + + before(async () => { + ipld = await inMemory(IPLD) + }) + + it('imports', async function () { + this.timeout(20 * 1000) + + const source = [{ + path: 'a/b/c/d/e', + content: Buffer.from('banana') + }, { + path: 'a/b/c/d/f', + content: Buffer.from('strawberry') + }, { + path: 'a/b/g', + content: Buffer.from('ice') + }, { + path: 'a/b/h', + content: Buffer.from('cream') + }] + + const files = await all(importer(source, ipld)) + + expect(files.map(normalizeNode).sort(byPath)).to.be.eql([{ + path: 'a/b/h', + multihash: 'QmWHMpCtdNjemT2F3SjyrmnBXQXwEohaZd4apcbFBhbFRC' + }, { + path: 'a/b/g', + multihash: 'QmQGwYzzTPcbqTiy2Nbp88gqqBqCWY4QZGfen45LFZkD5n' + }, { + path: 'a/b/c/d/f', + multihash: 'QmNVHs2dy7AjGUotsubWVncRsD3SpRXm8MgmCCQTVdVACz' + }, { + path: 'a/b/c/d/e', + multihash: 'QmYPbDKwc7oneCcEc6BcRSN5GXthTGWUCd19bTCyP9u3vH' + }, { + path: 'a/b/c/d', + multihash: 'QmQGDXr3ysARM38n7h79Tx7yD3YxuzcnZ1naG71WMojPoj' + }, { + path: 'a/b/c', + multihash: 'QmYTVcjYpN3hQLtJstCPE8hhEacAYjWAuTmmAAXoonamuE' + }, { + path: 'a/b', + multihash: 'QmWyWYxq1GD9fEyckf5LrJv8hMW35CwfWwzDBp8bTw3NQj' + }, { + path: 'a', + multihash: rootHash + }]) + }) + + it('exports', async function () { + this.timeout(20 * 1000) + + const dir = await exporter(rootHash, ipld) + const files = await recursiveExport(dir, rootHash) + + expect(files.sort(byPath)).to.eql([{ + path: 'QmdCrquDwd7RfZ6GCZFEVADwe8uyyw1YmF9mtAB7etDgmK/b/h', + content: 'cream' + }, { + path: 'QmdCrquDwd7RfZ6GCZFEVADwe8uyyw1YmF9mtAB7etDgmK/b/g', + content: 'ice' + }, { + path: 'QmdCrquDwd7RfZ6GCZFEVADwe8uyyw1YmF9mtAB7etDgmK/b/c/d/f', + content: 'strawberry' + }, { + path: 'QmdCrquDwd7RfZ6GCZFEVADwe8uyyw1YmF9mtAB7etDgmK/b/c/d/e', + content: 'banana' + }]) + }) +}) + +async function recursiveExport (node, path, entries = []) { + for await (const entry of node.content()) { + if (entry.unixfs.type === 'directory') { + await recursiveExport(entry, `${path}/${entry.name}`, entries) + } else { + entries.push({ + path: `${path}/${entry.name}`, + content: Buffer.concat(await all(entry.content())).toString() + }) + } + } + + return entries +} + +function normalizeNode (node) { + return { + path: node.path, + multihash: node.cid.toBaseEncodedString() + } +} + +function byPath (a, b) { + if (a.path > b.path) return -1 + if (a.path < b.path) return 1 + return 0 +} diff --git a/packages/ipfs-unixfs-importer/test/import-export.spec.js b/packages/ipfs-unixfs-importer/test/import-export.spec.js new file mode 100644 index 00000000..16a91eae --- /dev/null +++ b/packages/ipfs-unixfs-importer/test/import-export.spec.js @@ -0,0 +1,50 @@ +/* eslint-env mocha */ +/* eslint max-nested-callbacks: ["error", 5] */ +'use strict' + +const chai = require('chai') +chai.use(require('dirty-chai')) +const expect = chai.expect +const IPLD = require('ipld') +const inMemory = require('ipld-in-memory') +const loadFixture = require('aegir/fixtures') +const isNode = require('detect-node') +const bigFile = loadFixture((isNode ? __dirname : 'test') + '/fixtures/1.2MiB.txt') + +const importer = require('../src') +const exporter = require('ipfs-unixfs-exporter') + +const strategies = [ + 'flat', + 'balanced', + 'trickle' +] + +describe('import and export', function () { + this.timeout(30 * 1000) + + strategies.forEach((strategy) => { + const importerOptions = { strategy: strategy } + + describe('using builder: ' + strategy, () => { + let ipld + + before(async () => { + ipld = await inMemory(IPLD) + }) + + it('imports and exports', async () => { + const path = `${strategy}-big.dat` + const values = [{ path: path, content: bigFile }] + + for await (const file of importer(values, ipld, importerOptions)) { + expect(file.path).to.eql(path) + + const result = await exporter(file.cid, ipld) + + expect(result.unixfs.fileSize()).to.eql(bigFile.length) + } + }) + }) + }) +}) diff --git a/packages/ipfs-unixfs-importer/test/importer.spec.js b/packages/ipfs-unixfs-importer/test/importer.spec.js new file mode 100644 index 00000000..0f5b6589 --- /dev/null +++ b/packages/ipfs-unixfs-importer/test/importer.spec.js @@ -0,0 +1,987 @@ +/* eslint-env mocha */ +'use strict' + +const importer = require('../src') +const exporter = require('ipfs-unixfs-exporter') + +const extend = require('deep-extend') +const chai = require('chai') +chai.use(require('dirty-chai')) +const expect = chai.expect +const spy = require('sinon/lib/sinon/spy') +const IPLD = require('ipld') +const inMemory = require('ipld-in-memory') +const UnixFs = require('ipfs-unixfs') +const collectLeafCids = require('./helpers/collect-leaf-cids') +const loadFixture = require('aegir/fixtures') +const isNode = require('detect-node') +const bigFile = loadFixture((isNode ? __dirname : 'test') + '/fixtures/1.2MiB.txt') +const smallFile = loadFixture((isNode ? __dirname : 'test') + '/fixtures/200Bytes.txt') +const all = require('it-all') +const first = require('it-first') + +function stringifyMh (files) { + return files.map((file) => { + return { + ...file, + cid: file.cid.toBaseEncodedString() + } + }) +} + +function dateToTimespec (date) { + const ms = date.getTime() + const secs = Math.floor(ms / 1000) + + return { + secs, + nsecs: (ms - (secs * 1000)) * 1000 + } +} + +const baseFiles = { + '200Bytes.txt': { + cid: 'QmQmZQxSKQppbsWfVzBvg59Cn3DKtsNVQ94bjAxg2h3Lb8', + size: 200, + type: 'file', + path: '200Bytes.txt' + }, + '1.2MiB.txt': { + cid: 'QmW7BDxEbGqxxSYVtn3peNPQgdDXbWkoQ6J1EFYAEuQV3Q', + size: 1258000, + type: 'file', + path: '1.2MiB.txt' + }, + 'small.txt': { + cid: 'QmZMb7HWpbevpcdhbUV1ZZgdji8vh5uQ13KxczChGrK9Rd', + size: 15, + type: 'file', + path: 'small.txt' + } +} + +const strategyBaseFiles = { + flat: baseFiles, + balanced: extend({}, baseFiles, { + '1.2MiB.txt': { + cid: 'QmW7BDxEbGqxxSYVtn3peNPQgdDXbWkoQ6J1EFYAEuQV3Q', + type: 'file' + } + }), + trickle: extend({}, baseFiles, { + '200Bytes.txt': { + cid: 'QmY8bwnoKAKvJ8qtyPhWNxSS6sxiGVTJ9VpdQffs2KB5pE', + size: 200, + type: 'file', + path: '200Bytes.txt' + }, + '1.2MiB.txt': { + cid: 'QmfAxsHrpaLLuhbqqbo9KQyvQNawMnVSwutYoJed75pnco', + type: 'file' + } + }) +} + +const strategies = [ + 'flat', + 'balanced', + 'trickle' +] + +const strategyOverrides = { + balanced: { + 'foo-big': { + cid: 'QmaFgyFJUP4fxFySJCddg2Pj6rpwSywopWk87VEVv52RSj', + path: 'foo-big', + size: 1335478, + type: 'directory' + }, + pim: { + cid: 'QmY8a78tx6Tk6naDgWCgTsd9EqGrUJRrH7dDyQhjyrmH2i', + path: 'pim', + size: 1335744, + type: 'directory' + }, + 'pam/pum': { + cid: 'QmY8a78tx6Tk6naDgWCgTsd9EqGrUJRrH7dDyQhjyrmH2i', + path: 'pam/pum', + size: 1335744, + type: 'directory' + }, + pam: { + cid: 'QmRgdtzNx1H1BPJqShdhvWZ2D4DA2HUgZJ3XLtoXei27Av', + path: 'pam', + size: 2671269, + type: 'directory' + } + }, + trickle: { + 'foo-big': { + cid: 'QmaKbhFRy9kcCbcwrLsqYHWMiY44BDYkqTCMpAxDdd2du2', + path: 'foo-big', + size: 1334657, + type: 'directory' + }, + pim: { + cid: 'QmbWGdnua4YuYpWJb7fE25PRbW9GbKKLqq9Ucmnsg2gxnt', + path: 'pim', + size: 1334923, + type: 'directory' + }, + 'pam/pum': { + cid: 'QmbWGdnua4YuYpWJb7fE25PRbW9GbKKLqq9Ucmnsg2gxnt', + path: 'pam/pum', + size: 1334923, + type: 'directory' + }, + pam: { + cid: 'QmSuh47G9Qm3PFv1zziojtHxqCjuurSdtWAzxLxoKJPq2U', + path: 'pam', + size: 2669627, + type: 'directory' + }, + '200Bytes.txt with raw leaves': { + cid: 'QmagyRwMfYhczYNv5SvcJc8xxXjZQBTTHS2jEqNMva2mYT', + size: 200, + path: '200Bytes.txt', + type: 'file' + }, + 'foo/bar': { + cid: 'QmTGMxKPzSGNBDp6jhTwnZxGW6w1S9ciyycRJ4b2qcQaHK', + size: 0, + path: 'foo/bar', + type: 'directory' + }, + foo: { + cid: 'Qme4A8fZmwfZESappfPcxSMTZVACiEzhHKtYRMuM1hbkDp', + size: 0, + path: 'foo', + type: 'directory' + }, + 'small.txt': { + cid: 'QmXmZ3qT328JxWtQXqrmvma2FmPp7tMdNiSuYvVJ5QRhKs', + size: 15, + type: 'file', + path: 'small.txt' + } + } +} + +const checkLeafNodeTypes = async (ipld, options, expected) => { + const file = await first(importer([{ + path: 'foo', + content: Buffer.alloc(262144 + 5).fill(1) + }], ipld, options)) + + const node = await ipld.get(file.cid) + const meta = UnixFs.unmarshal(node.Data) + + expect(meta.type).to.equal('file') + expect(node.Links.length).to.equal(2) + + const linkedNodes = await Promise.all( + node.Links.map(link => ipld.get(link.Hash)) + ) + + linkedNodes.forEach(node => { + const meta = UnixFs.unmarshal(node.Data) + expect(meta.type).to.equal(expected) + }) +} + +const checkNodeLinks = async (ipld, options, expected) => { + for await (const file of importer([{ + path: 'foo', + content: Buffer.alloc(100).fill(1) + }], ipld, options)) { + const node = await ipld.get(file.cid) + const meta = UnixFs.unmarshal(node.Data) + + expect(meta.type).to.equal('file') + expect(node.Links.length).to.equal(expected) + } +} + +strategies.forEach((strategy) => { + const baseFiles = strategyBaseFiles[strategy] + const defaultResults = extend({}, baseFiles, { + 'foo/bar/200Bytes.txt': extend({}, baseFiles['200Bytes.txt'], { + path: 'foo/bar/200Bytes.txt' + }), + foo: { + path: 'foo', + cid: 'QmQrb6KKWGo8w7zKfx2JksptY6wN7B2ysSBdKZr4xMU36d', + size: 320, + type: 'directory' + }, + 'foo/bar': { + path: 'foo/bar', + cid: 'Qmf5BQbTUyUAvd6Ewct83GYGnE1F6btiC3acLhR8MDxgkD', + size: 270, + type: 'directory' + }, + 'foo-big/1.2MiB.txt': extend({}, baseFiles['1.2MiB.txt'], { + path: 'foo-big/1.2MiB.txt' + }), + 'foo-big': { + path: 'foo-big', + cid: 'QmaFgyFJUP4fxFySJCddg2Pj6rpwSywopWk87VEVv52RSj', + size: 1328120, + type: 'directory' + }, + 'pim/200Bytes.txt': extend({}, baseFiles['200Bytes.txt'], { + path: 'pim/200Bytes.txt' + }), + 'pim/1.2MiB.txt': extend({}, baseFiles['1.2MiB.txt'], { + path: 'pim/1.2MiB.txt' + }), + pim: { + path: 'pim', + cid: 'QmY8a78tx6Tk6naDgWCgTsd9EqGrUJRrH7dDyQhjyrmH2i', + size: 1328386, + type: 'directory' + }, + 'empty-dir': { + path: 'empty-dir', + cid: 'QmUNLLsPACCz1vLxQVkXqqLX5R1X345qqfHbsf67hvA3Nn', + size: 4, + type: 'directory' + }, + 'pam/pum': { + cid: 'QmY8a78tx6Tk6naDgWCgTsd9EqGrUJRrH7dDyQhjyrmH2i', + path: 'pam/pum', + size: 1328386, + type: 'directory' + }, + pam: { + cid: 'QmRgdtzNx1H1BPJqShdhvWZ2D4DA2HUgZJ3XLtoXei27Av', + path: 'pam', + size: 2656553, + type: 'directory' + }, + '200Bytes.txt with raw leaves': extend({}, baseFiles['200Bytes.txt'], { + cid: 'QmQmZQxSKQppbsWfVzBvg59Cn3DKtsNVQ94bjAxg2h3Lb8', + size: 200 + }) + }, strategyOverrides[strategy]) + + const expected = extend({}, defaultResults, strategies[strategy]) + + const expectFiles = (actualFiles, expectedFiles) => { + expect(actualFiles.length).to.equal(expectedFiles.length) + + for (let i = 0; i < expectedFiles.length; i++) { + const expectedFile = expected[expectedFiles[i]] + const actualFile = actualFiles[i] + + expect(actualFile.path).to.equal(expectedFile.path) + expect(actualFile.cid.toBaseEncodedString('base58btc')).to.equal(expectedFile.cid) + + if (actualFile.unixfs) { + expect(actualFile.unixfs.type).to.equal(expectedFile.type) + + if (actualFile.unixfs.type === 'file') { + expect(actualFile.unixfs.fileSize()).to.equal(expectedFile.size) + } + } + } + } + + describe('importer: ' + strategy, function () { + this.timeout(30 * 1000) + + let ipld + const options = { + strategy: strategy + } + + before(async () => { + ipld = await inMemory(IPLD) + }) + + it('fails on bad content', async () => { + try { + await all(importer([{ + path: '200Bytes.txt', + content: 7 + }], ipld, options)) + throw new Error('No error was thrown') + } catch (err) { + expect(err.code).to.equal('ERR_INVALID_CONTENT') + } + }) + + it('fails on an iterator that yields bad content', async () => { + try { + await all(importer([{ + path: '200Bytes.txt', + content: { + [Symbol.iterator]: function * () { + yield 7 + } + } + }], ipld, options)) + throw new Error('No error was thrown') + } catch (err) { + expect(err.code).to.equal('ERR_INVALID_CONTENT') + } + }) + + it('doesn\'t yield anything on empty source', async () => { + const files = await all(importer([], ipld, options)) + + expect(files).to.be.empty() + }) + + it('doesn\'t yield anything on empty file', async () => { + const files = await all(importer([{ + path: 'emptyfile', + content: Buffer.alloc(0) + }], ipld, options)) + + expect(files.length).to.eql(1) + + // always yield empty file node + expect(files[0].cid.toBaseEncodedString()).to.eql('QmbFMke1KXqnYyBBWxB74N4c5SBnJMVAiMNRcGu6x1AwQH') + }) + + it('fails on more than one root', async () => { + try { + await all(importer([{ + path: 'beep/200Bytes.txt', + content: smallFile + }, { + path: 'boop/200Bytes.txt', + content: bigFile + }], ipld, options)) + + throw new Error('No error was thrown') + } catch (err) { + expect(err.code).to.equal('ERR_MORE_THAN_ONE_ROOT') + } + }) + + it('accepts strings as content', async () => { + const content = 'I am a string' + const res = await all(importer([{ + path: '200Bytes.txt', + content + }], ipld, options)) + + const file = await exporter(res[0].cid, ipld) + const fileContent = await all(file.content()) + + expect(fileContent.toString()).to.equal(content) + }) + + it('small file with an escaped slash in the title', async () => { + const filePath = `small-\\/file-${Math.random()}.txt` + const files = await all(importer([{ + path: filePath, + content: smallFile + }], ipld, options)) + + expect(files.length).to.equal(1) + expect(files[0].path).to.equal(filePath) + }) + + it('small file with square brackets in the title', async () => { + const filePath = `small-[v]-file-${Math.random()}.txt` + const files = await all(importer([{ + path: filePath, + content: smallFile + }], ipld, options)) + + expect(files.length).to.equal(1) + expect(files[0].path).to.equal(filePath) + }) + + it('small file as buffer (smaller than a chunk)', async () => { + const files = await all(importer([{ + path: '200Bytes.txt', + content: smallFile + }], ipld, options)) + + expectFiles(files, [ + '200Bytes.txt' + ]) + }) + + it('small file as array (smaller than a chunk)', async () => { + const files = await all(importer([{ + path: '200Bytes.txt', + content: Array.from(smallFile) + }], ipld, options)) + + expectFiles(files, [ + '200Bytes.txt' + ]) + }) + + it('small file as string (smaller than a chunk)', async () => { + const files = await all(importer([{ + path: 'small.txt', + content: 'this is a file\n' + }], ipld, options)) + + expectFiles(files, [ + 'small.txt' + ]) + }) + + it('small file (smaller than a chunk) with raw leaves', async () => { + const files = await all(importer([{ + path: '200Bytes.txt', + content: smallFile + }], ipld, { + ...options, + rawLeaves: true + })) + + expectFiles(files, [ + '200Bytes.txt with raw leaves' + ]) + }) + + it('small file (smaller than a chunk) inside a dir', async () => { + const files = await all(importer([{ + path: 'foo/bar/200Bytes.txt', + content: smallFile + }], ipld, options)) + + expectFiles(files, [ + 'foo/bar/200Bytes.txt', + 'foo/bar', + 'foo' + ]) + }) + + it('file bigger than a single chunk', async () => { + this.timeout(60 * 1000) + + const files = await all(importer([{ + path: '1.2MiB.txt', + content: bigFile + }], ipld, options)) + + expectFiles(files, [ + '1.2MiB.txt' + ]) + }) + + it('file bigger than a single chunk inside a dir', async () => { + this.timeout(60 * 1000) + + const files = await all(importer([{ + path: 'foo-big/1.2MiB.txt', + content: bigFile + }], ipld, options)) + + expectFiles(files, [ + 'foo-big/1.2MiB.txt', + 'foo-big' + ]) + }) + + it('empty directory', async () => { + const files = await all(importer([{ + path: 'empty-dir' + }], ipld, options)) + + expectFiles(files, [ + 'empty-dir' + ]) + }) + + it('directory with files', async () => { + const files = await all(importer([{ + path: 'pim/200Bytes.txt', + content: smallFile + }, { + path: 'pim/1.2MiB.txt', + content: bigFile + }], ipld, options)) + + expectFiles(files, [ + 'pim/200Bytes.txt', + 'pim/1.2MiB.txt', + 'pim' + ]) + }) + + it('nested directory (2 levels deep)', async () => { + const files = await all(importer([{ + path: 'pam/pum/200Bytes.txt', + content: smallFile + }, { + path: 'pam/pum/1.2MiB.txt', + content: bigFile + }, { + path: 'pam/1.2MiB.txt', + content: bigFile + }], ipld, options)) + + const result = stringifyMh(files) + + expect(result.length).to.equal(5) + + result.forEach(eachFile) + + function eachFile (file) { + if (file.path === 'pam/pum/200Bytes.txt') { + expect(file.cid).to.equal(expected['200Bytes.txt'].cid) + expect(file.unixfs.fileSize()).to.equal(expected['200Bytes.txt'].size) + } else if (file.path === 'pam/pum/1.2MiB.txt') { + expect(file.cid).to.equal(expected['1.2MiB.txt'].cid) + expect(file.unixfs.fileSize()).to.equal(expected['1.2MiB.txt'].size) + } else if (file.path === 'pam/pum') { + expect(file.cid).to.equal(expected['pam/pum'].cid) + } else if (file.path === 'pam/1.2MiB.txt') { + expect(file.cid).to.equal(expected['1.2MiB.txt'].cid) + expect(file.unixfs.fileSize()).to.equal(expected['1.2MiB.txt'].size) + } else if (file.path === 'pam') { + expect(file.cid).to.equal(expected.pam.cid) + } else { + throw new Error(`Unexpected path ${file.path}`) + } + } + }) + + it('will not write to disk if passed "onlyHash" option', async () => { + const content = String(Math.random() + Date.now()) + const files = await all(importer([{ + path: content + '.txt', + content: Buffer.from(content) + }], ipld, { + onlyHash: true + })) + + const file = files[0] + expect(file).to.exist() + + try { + await ipld.get(file.cid) + + throw new Error('No error was thrown') + } catch (err) { + expect(err.code).to.equal('ERR_NOT_FOUND') + } + }) + + it('will call an optional progress function', async () => { + const maxChunkSize = 2048 + + const options = { + progress: spy(), + maxChunkSize + } + + await all(importer([{ + path: '1.2MiB.txt', + content: bigFile + }], ipld, options)) + + expect(options.progress.called).to.equal(true) + expect(options.progress.args[0][0]).to.equal(maxChunkSize) + }) + + it('will import files with CID version 1', async () => { + const createInputFile = (path, size) => { + const name = String(Math.random() + Date.now()) + path = path[path.length - 1] === '/' ? path : path + '/' + return { + path: path + name + '.txt', + content: Buffer.alloc(size).fill(1) + } + } + + const inputFiles = [ + createInputFile('/foo', 10), + createInputFile('/foo', 60), + createInputFile('/foo/bar', 78), + createInputFile('/foo/baz', 200), + // Bigger than maxChunkSize + createInputFile('/foo', 262144 + 45), + createInputFile('/foo/bar', 262144 + 134), + createInputFile('/foo/bar', 262144 + 79), + createInputFile('/foo/bar', 262144 + 876), + createInputFile('/foo/bar', 262144 + 21) + ] + + const options = { + cidVersion: 1, + // Ensures we use DirSharded for the data below + shardSplitThreshold: 3 + } + + // Pass a copy of inputFiles, since the importer mutates them + const files = await all(importer(inputFiles.map(f => Object.assign({}, f)), ipld, options)) + + const file = files[0] + expect(file).to.exist() + + for (let i = 0; i < file.length; i++) { + const file = files[i] + + const cid = file.cid.toV1() + const inputFile = inputFiles.find(f => f.path === file.path) + + // Just check the intermediate directory can be retrieved + if (!inputFile) { + await ipld.get(cid) + } + + // Check the imported content is correct + const node = await exporter(cid, ipld) + const chunks = [] + + for await (const chunk of node.content()) { + chunks.push(chunk) + } + + expect(Buffer.concat(chunks)).to.deep.equal(inputFile.content) + } + }) + + it('imports file with raw leaf nodes when specified', () => { + return checkLeafNodeTypes(ipld, { + leafType: 'raw' + }, 'raw') + }) + + it('imports file with file leaf nodes when specified', () => { + return checkLeafNodeTypes(ipld, { + leafType: 'file' + }, 'file') + }) + + it('reduces file to single node when specified', () => { + return checkNodeLinks(ipld, { + reduceSingleLeafToSelf: true + }, 0) + }) + + it('does not reduce file to single node when overidden by options', () => { + return checkNodeLinks(ipld, { + reduceSingleLeafToSelf: false + }, 1) + }) + + it('uses raw leaf nodes when requested', async () => { + this.timeout(60 * 1000) + + const options = { + rawLeaves: true + } + + for await (const file of importer([{ + path: '1.2MiB.txt', + content: bigFile + }], ipld, options)) { + for await (const { cid } of collectLeafCids(file.cid, ipld)) { + expect(cid.codec).to.be('raw') + expect(cid.version).to.be(1) + } + } + }) + + it('supports passing mtime', async () => { + this.timeout(60 * 1000) + + const options = { + rawLeaves: true + } + const now = new Date() + + for await (const file of importer([{ + path: '1.2MiB.txt', + content: bigFile, + mtime: now + }], ipld, options)) { + const node = await exporter(file.cid, ipld) + + expect(node).to.have.nested.deep.property('unixfs.mtime', dateToTimespec(now)) + } + }) + + it('supports passing mtime for directories', async () => { + this.timeout(60 * 1000) + + const now = new Date() + + const entries = await all(importer([{ + path: '/foo', + mtime: now + }], ipld)) + + const node = await exporter(entries[0].cid, ipld) + expect(node).to.have.nested.deep.property('unixfs.mtime', dateToTimespec(now)) + }) + + it('supports passing metadata for wrapping directories', async () => { + this.timeout(60 * 1000) + + const now = new Date() + const perms = 0o0777 + + const entries = await all(importer([{ + path: '/foo', + mtime: now, + mode: perms + }, { + path: '/foo/bar.txt', + content: bigFile + }], ipld)) + + const nodes = await all(exporter.recursive(entries[entries.length - 1].cid, ipld)) + const node = nodes.filter(node => node.unixfs.type === 'directory').pop() + + if (!node) { + expect.fail('no directory found') + } + + expect(node).to.have.nested.deep.property('unixfs.mtime', dateToTimespec(now)) + expect(node).to.have.nested.property('unixfs.mode', perms) + }) + + it('supports passing metadata for intermediate directories', async () => { + this.timeout(60 * 1000) + + const now = new Date() + const perms = 0o0777 + + const entries = await all(importer([{ + path: '/foo/bar', + mtime: now, + mode: perms + }, { + path: '/foo/bar/baz.txt', + content: bigFile + }], ipld)) + + const nodes = await all(exporter.recursive(entries[entries.length - 1].cid, ipld)) + const node = nodes.filter(node => node.unixfs.type === 'directory').pop() + + if (!node) { + expect.fail('no directory found') + } + + expect(node).to.have.nested.deep.property('unixfs.mtime', dateToTimespec(now)) + expect(node).to.have.nested.property('unixfs.mode', perms) + }) + + it('supports passing metadata for out of order intermediate directories', async () => { + this.timeout(60 * 1000) + + const now = new Date() + const perms = 0o0777 + + const entries = await all(importer([{ + path: '/foo/bar/qux.txt', + content: bigFile + }, { + path: '/foo/bar', + mtime: now, + mode: perms + }, { + path: '/foo/quux' + }, { + path: '/foo/bar/baz.txt', + content: bigFile + }], ipld)) + + const nodes = await all(exporter.recursive(entries[entries.length - 1].cid, ipld)) + const node = nodes.filter(node => node.unixfs.type === 'directory' && node.name === 'bar').pop() + + if (!node) { + expect.fail('no directory found') + } + + expect(node).to.have.nested.deep.property('unixfs.mtime', dateToTimespec(now)) + expect(node).to.have.nested.property('unixfs.mode', perms) + }) + + it('supports passing mtime for hamt-sharded-directories', async () => { + this.timeout(60 * 1000) + + const now = new Date() + + const entries = await all(importer([{ + path: '/foo', + mtime: now + }, { + path: '/foo/bar.txt', + content: bigFile + }, { + path: '/foo/baz.txt', + content: bigFile + }, { + path: '/foo/qux' + }], ipld, { + shardSplitThreshold: 0 + })) + + const nodes = await all(exporter.recursive(entries[entries.length - 1].cid, ipld)) + const node = nodes.filter(node => node.unixfs.type === 'hamt-sharded-directory').pop() + + if (!node) { + expect.fail('no hamt-sharded-directory found') + } + + expect(node).to.have.nested.deep.property('unixfs.mtime', dateToTimespec(now)) + }) + + it('supports passing mode', async () => { + this.timeout(60 * 1000) + + const options = { + rawLeaves: true + } + const mode = 0o0111 + + for await (const file of importer([{ + path: '1.2MiB.txt', + content: bigFile, + mode + }], ipld, options)) { + const node = await exporter(file.cid, ipld) + + expect(node).to.have.nested.property('unixfs.mode', mode) + } + }) + + it('supports passing mode for directories', async () => { + this.timeout(60 * 1000) + + const mode = 0o0111 + + const entries = await all(importer([{ + path: '/foo', + mode + }], ipld)) + + const node = await exporter(entries[0].cid, ipld) + expect(node).to.have.nested.property('unixfs.mode', mode) + }) + + it('supports passing different modes for different files', async () => { + this.timeout(60 * 1000) + + const mode1 = 0o0111 + const mode2 = 0o0222 + + const entries = await all(importer([{ + path: '/foo/file1.txt', + content: bigFile, + mode: mode1 + }, { + path: '/foo/file2.txt', + content: bigFile, + mode: mode2 + }], ipld)) + + const node1 = await exporter(entries[0].cid, ipld) + expect(node1).to.have.nested.property('unixfs.mode', mode1) + + const node2 = await exporter(entries[1].cid, ipld) + expect(node2).to.have.nested.property('unixfs.mode', mode2) + }) + + it('supports deeply nested files do not inherit custom metadata', async () => { + this.timeout(60 * 1000) + + const mode = 0o0111 + + const entries = await all(importer([{ + path: '/foo/file1.txt', + content: bigFile, + mode: mode + }, { + path: '/foo/bar/baz/file2.txt', + content: bigFile + }], ipld)) + + const node1 = await exporter(entries[0].cid, ipld) + expect(node1).to.have.nested.property('unixfs.mode', mode) + + const node2 = await exporter(entries[1].cid, ipld) + expect(node2).to.have.nested.property('unixfs.mode').that.does.not.equal(mode) + }) + + it('files and directories get default mode if not specified', async () => { + this.timeout(60 * 1000) + + const entries = await all(importer([{ + path: '/foo/file1.txt', + content: bigFile + }], ipld)) + + const node1 = await exporter(entries[0].cid, ipld) + expect(node1).to.have.nested.property('unixfs.mode', 0o0644) + + const node2 = await exporter(entries[1].cid, ipld) + expect(node2).to.have.nested.property('unixfs.mode', 0o0755) + }) + }) +}) + +describe('configuration', () => { + it('alllows configuring with custom dag and tree builder', async () => { + let builtTree = false + const ipld = 'ipld' + const entries = await all(importer([{ + path: 'path', + content: 'content' + }], ipld, { + dagBuilder: async function * (source, ipld, opts) { // eslint-disable-line require-await + yield function () { + return Promise.resolve({ + cid: 'cid', + path: 'path', + unixfs: 'unixfs' + }) + } + }, + treeBuilder: async function * (source, ipld, opts) { // eslint-disable-line require-await + builtTree = true + yield * source + } + })) + + expect(entries).to.have.lengthOf(1) + expect(entries).to.have.nested.property('[0].cid', 'cid') + expect(entries).to.have.nested.property('[0].path', 'path') + expect(entries).to.have.nested.property('[0].unixfs', 'unixfs') + + expect(builtTree).to.be.true() + }) + + it('alllows configuring with custom chunker', async () => { + let validated = false + let chunked = false + const ipld = { + put: () => 'cid' + } + const entries = await all(importer([{ + path: 'path', + content: 'content' + }], ipld, { + chunkValidator: async function * (source, opts) { // eslint-disable-line require-await + validated = true + yield * source + }, + chunker: async function * (source, opts) { // eslint-disable-line require-await + chunked = true + yield * source + } + })) + + expect(entries).to.have.lengthOf(1) + expect(entries).to.have.nested.property('[0].cid', 'cid') + expect(entries).to.have.nested.property('[0].path', 'path') + expect(entries).to.have.nested.property('[0].unixfs') + + expect(validated).to.be.true() + expect(chunked).to.be.true() + }) +}) diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/2B/CIQCLHFOKW5OR6TMLOEULA42ZCNIUH5AGNU7R5OCASITUKITSBP22BA.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/2B/CIQCLHFOKW5OR6TMLOEULA42ZCNIUH5AGNU7R5OCASITUKITSBP22BA.data new file mode 100644 index 00000000..70641f44 --- /dev/null +++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/2B/CIQCLHFOKW5OR6TMLOEULA42ZCNIUH5AGNU7R5OCASITUKITSBP22BA.data @@ -0,0 +1,5 @@ + +wxxM{ +DzH/&^ RS/v,R +=Ng~pf1\[>%U1@Q׀2&m6qQ؁]|!K E~J ֕읝ojbn3eT)D+;s +컓:Ty!c3\*T7E?[Pv}A+cx~e \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/2F/CIQEUWUVLBXVFYSYCHHSCRTXCYHGIOBXKWUMKFR3UPAFHQ5WK5362FQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/2F/CIQEUWUVLBXVFYSYCHHSCRTXCYHGIOBXKWUMKFR3UPAFHQ5WK5362FQ.data new file mode 100644 index 00000000..41456196 --- /dev/null +++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/2F/CIQEUWUVLBXVFYSYCHHSCRTXCYHGIOBXKWUMKFR3UPAFHQ5WK5362FQ.data @@ -0,0 +1,4 @@ + +ys# js-ipfs-repo +Implementation of the IPFS repo spec (https://github.com/ipfs/specs/tree/master/repo) in JavaScript +s \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/2P/CIQLCBXFJEBKPWASINYRB5OBXDOZ3PBXLDJQNRVNYD7HUBGVZD6T2PA.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/2P/CIQLCBXFJEBKPWASINYRB5OBXDOZ3PBXLDJQNRVNYD7HUBGVZD6T2PA.data new file mode 100644 index 00000000..ce734230 --- /dev/null +++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/2P/CIQLCBXFJEBKPWASINYRB5OBXDOZ3PBXLDJQNRVNYD7HUBGVZD6T2PA.data @@ -0,0 +1,4 @@ +4 +" si"¹W` +- browsers or extensions can learn to use `ipfs://` directly +- hash-addressed content guarantees authenticity + +IPFS is modular: +- connection layer over any network protocol +- routing layer +- uses a routing layer DHT (kademlia/coral) +- uses a path-based naming service +- uses bittorrent-inspired block exchange + +IPFS uses crypto: +- cryptographic-hash content addressing +- block-level deduplication +- file integrity + versioning +- filesystem-level encryption + signing support + +IPFS is p2p: +- worldwide peer-to-peer file transfers +- completely decentralized architecture +- **no** central point of failure + +IPFS is a cdn: +- add a file to the filesystem locally, and it's now available to the world +- caching-friendly (content-hash naming) +- bittorrent-based bandwidth distribution + +IPFS has a name service: +- IPNS, an SFS inspired name system +- global namespace based on PKI +- serves to build trust chains +- compatible with other NSes +- can map DNS, .onion, .bit, etc to IPNS + \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/7R/CIQGNXYJ6NHQTTNWY7E7MLOVZD5BRFMJL3H27REITBBUWURIKQTP7RQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/7R/CIQGNXYJ6NHQTTNWY7E7MLOVZD5BRFMJL3H27REITBBUWURIKQTP7RQ.data new file mode 100644 index 00000000..42c502e2 Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/7R/CIQGNXYJ6NHQTTNWY7E7MLOVZD5BRFMJL3H27REITBBUWURIKQTP7RQ.data differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/A3/CIQD76D3PRB4H6QE6C7DJX224YXYFEKLENPHGHEJPQZ723Z4L4IUA3I.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/A3/CIQD76D3PRB4H6QE6C7DJX224YXYFEKLENPHGHEJPQZ723Z4L4IUA3I.data new file mode 100644 index 00000000..46fecabf --- /dev/null +++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/A3/CIQD76D3PRB4H6QE6C7DJX224YXYFEKLENPHGHEJPQZ723Z4L4IUA3I.data @@ -0,0 +1,2 @@ + +Q \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/AD/CIQAC3NNL5OBTUOK2A4CXXDS5Y6DLLCSC26OJVKNQ5IVDXLHGM5HADA.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/AD/CIQAC3NNL5OBTUOK2A4CXXDS5Y6DLLCSC26OJVKNQ5IVDXLHGM5HADA.data new file mode 100644 index 00000000..1379fd9c Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/AD/CIQAC3NNL5OBTUOK2A4CXXDS5Y6DLLCSC26OJVKNQ5IVDXLHGM5HADA.data differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/AD/CIQDKCWPMK632NCNXGUY4TT465TRBMZJ5XRELAX655W3OS5K7ZHVADI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/AD/CIQDKCWPMK632NCNXGUY4TT465TRBMZJ5XRELAX655W3OS5K7ZHVADI.data new file mode 100644 index 00000000..ee87b15f --- /dev/null +++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/AD/CIQDKCWPMK632NCNXGUY4TT465TRBMZJ5XRELAX655W3OS5K7ZHVADI.data @@ -0,0 +1,1452 @@ + +l systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + + + \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/AE/CIQONICFQZH7QVU6IPSIM3AK7AD554D3BWZPAGEAQYQOWMFZQDUUAEI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/AE/CIQONICFQZH7QVU6IPSIM3AK7AD554D3BWZPAGEAQYQOWMFZQDUUAEI.data new file mode 100644 index 00000000..6860441a --- /dev/null +++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/AE/CIQONICFQZH7QVU6IPSIM3AK7AD554D3BWZPAGEAQYQOWMFZQDUUAEI.data @@ -0,0 +1,3 @@ +/ +" gq6\u8~:6~gZ.directT2 +" 6(%݄.Ӿ5(ab recursiveT \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/AM/CIQHG2O56243WIVNYK4VP2Z4U73KN42HPSC3MZPSDW2QQNAJD6ICAMY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/AM/CIQHG2O56243WIVNYK4VP2Z4U73KN42HPSC3MZPSDW2QQNAJD6ICAMY.data new file mode 100644 index 00000000..f57749f0 Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/AM/CIQHG2O56243WIVNYK4VP2Z4U73KN42HPSC3MZPSDW2QQNAJD6ICAMY.data differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/AM/CIQLILAMXCIV5B2ONEE63TLHVKS52D52O77I52JGFOH7U3ACVOKNAMY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/AM/CIQLILAMXCIV5B2ONEE63TLHVKS52D52O77I52JGFOH7U3ACVOKNAMY.data new file mode 100644 index 00000000..6a0cbe82 --- /dev/null +++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/AM/CIQLILAMXCIV5B2ONEE63TLHVKS52D52O77I52JGFOH7U3ACVOKNAMY.data @@ -0,0 +1,3 @@ + + +Q diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/AP/CIQHAKDLTL5GMIFGN5YVY4BA22FPHUIODJEXS4LCTQDWA275XAJDAPI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/AP/CIQHAKDLTL5GMIFGN5YVY4BA22FPHUIODJEXS4LCTQDWA275XAJDAPI.data new file mode 100644 index 00000000..74de75af Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/AP/CIQHAKDLTL5GMIFGN5YVY4BA22FPHUIODJEXS4LCTQDWA275XAJDAPI.data differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/AU/CIQHESGWL6ZUVT4RKZS6V2ZJ4IDV7RR6M6FQFAOFWXOJYNVRTHTMAUI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/AU/CIQHESGWL6ZUVT4RKZS6V2ZJ4IDV7RR6M6FQFAOFWXOJYNVRTHTMAUI.data new file mode 100644 index 00000000..f4c039c2 Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/AU/CIQHESGWL6ZUVT4RKZS6V2ZJ4IDV7RR6M6FQFAOFWXOJYNVRTHTMAUI.data differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/AY/CIQGCTOPJA57F6YZZAVSFL44RVD2GWTNJ7OPQXRT3NG7NGD633QHAYY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/AY/CIQGCTOPJA57F6YZZAVSFL44RVD2GWTNJ7OPQXRT3NG7NGD633QHAYY.data new file mode 100644 index 00000000..8eb2a515 Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/AY/CIQGCTOPJA57F6YZZAVSFL44RVD2GWTNJ7OPQXRT3NG7NGD633QHAYY.data differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/BY/CIQGTEDX5RFSVBJ7FQAHSZGMC5HOO4XG5GZAZRZ5EA43NKKQEJXWBYY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/BY/CIQGTEDX5RFSVBJ7FQAHSZGMC5HOO4XG5GZAZRZ5EA43NKKQEJXWBYY.data new file mode 100644 index 00000000..a9c1c069 Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/BY/CIQGTEDX5RFSVBJ7FQAHSZGMC5HOO4XG5GZAZRZ5EA43NKKQEJXWBYY.data differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/C2/CIQK5PD7SAKEC7TPLYXS3EZQRAVYTFPR5ZY6HTI7DDQAG6AZQOXIC2I.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/C2/CIQK5PD7SAKEC7TPLYXS3EZQRAVYTFPR5ZY6HTI7DDQAG6AZQOXIC2I.data new file mode 100644 index 00000000..1067edb4 --- /dev/null +++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/C2/CIQK5PD7SAKEC7TPLYXS3EZQRAVYTFPR5ZY6HTI7DDQAG6AZQOXIC2I.data @@ -0,0 +1,6 @@ + + +wxxM{ +DzH/&^ RS/v,R +=Ng~pf1\[>%U1@Q׀2&m6qQ؁]|!K E~J ֕읝ojbn3eT)D+;s +컓:Ty!c3\*T7E?[Pv}A+cx~e \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/C3/CIQHS67KEOPN356BDJDRIMDCS5NQBMLCVMUGUAM2BWZNUWV7WZ7FC3Q.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/C3/CIQHS67KEOPN356BDJDRIMDCS5NQBMLCVMUGUAM2BWZNUWV7WZ7FC3Q.data new file mode 100644 index 00000000..4741988d --- /dev/null +++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/C3/CIQHS67KEOPN356BDJDRIMDCS5NQBMLCVMUGUAM2BWZNUWV7WZ7FC3Q.data @@ -0,0 +1,4729 @@ + +e academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others  \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/C3/CIQLFQWZO45IGFSCQEDJKOZCAYNHTNVD6NVXVPWWBHATU2YG4E6HC3A.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/C3/CIQLFQWZO45IGFSCQEDJKOZCAYNHTNVD6NVXVPWWBHATU2YG4E6HC3A.data new file mode 100644 index 00000000..df20559d Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/C3/CIQLFQWZO45IGFSCQEDJKOZCAYNHTNVD6NVXVPWWBHATU2YG4E6HC3A.data differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/C4/CIQDDZ5EDQK5AP7LRTLZHQZUR2R3GECRFV3WPKNL7PL2SKFIL2LXC4Y.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/C4/CIQDDZ5EDQK5AP7LRTLZHQZUR2R3GECRFV3WPKNL7PL2SKFIL2LXC4Y.data new file mode 100644 index 00000000..ecce1053 --- /dev/null +++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/C4/CIQDDZ5EDQK5AP7LRTLZHQZUR2R3GECRFV3WPKNL7PL2SKFIL2LXC4Y.data @@ -0,0 +1,4 @@ +5 +" ׾F_uؔlzS?|ڲPc@ js-ipfs-repo + + \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/CB/CIQGXO5AO2FIISVRDFHFQ5W7W3HEHGNS5KD2JNYYZGCQ3NQPV22QCBI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/CB/CIQGXO5AO2FIISVRDFHFQ5W7W3HEHGNS5KD2JNYYZGCQ3NQPV22QCBI.data new file mode 100644 index 00000000..96566028 --- /dev/null +++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/CB/CIQGXO5AO2FIISVRDFHFQ5W7W3HEHGNS5KD2JNYYZGCQ3NQPV22QCBI.data @@ -0,0 +1,6 @@ + +5 +" $G,A4{xZ/.D` 200Bytes.txt3 +" Y9_)a˹2RmŖke9 dir-another0 +" Ty5 ;_9YfqFLhyl/level-1 + \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/CJ/CIQLMFYJNAZ2H33DVIPIS3A7S2WIMHBY62BY5OFCIR2NVHZUHT7PCJY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/CJ/CIQLMFYJNAZ2H33DVIPIS3A7S2WIMHBY62BY5OFCIR2NVHZUHT7PCJY.data new file mode 100644 index 00000000..fa45ee79 Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/CJ/CIQLMFYJNAZ2H33DVIPIS3A7S2WIMHBY62BY5OFCIR2NVHZUHT7PCJY.data differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/CY/CIQDMKFEUGKSLXMEXO774EZOYCYNHPRVFD53ZSAU7237F67XDSQGCYQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/CY/CIQDMKFEUGKSLXMEXO774EZOYCYNHPRVFD53ZSAU7237F67XDSQGCYQ.data new file mode 100644 index 00000000..bbe6bda7 Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/CY/CIQDMKFEUGKSLXMEXO774EZOYCYNHPRVFD53ZSAU7237F67XDSQGCYQ.data differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/DJ/CIQCKYVVKKTZ5H7RDBHOBR72KDZZ7Y4BAVSMNITBWIGNAQQFG4UUDJQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/DJ/CIQCKYVVKKTZ5H7RDBHOBR72KDZZ7Y4BAVSMNITBWIGNAQQFG4UUDJQ.data new file mode 100644 index 00000000..b99ceb21 --- /dev/null +++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/DJ/CIQCKYVVKKTZ5H7RDBHOBR72KDZZ7Y4BAVSMNITBWIGNAQQFG4UUDJQ.data @@ -0,0 +1,3 @@ +, +" `u>/2l ilfYB'M%bar + \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/DZ/CIQH7E5UPL3DQGEXQXDT2QKNMU2GRLW43TH7QFHITLDVLJKQFZYEDZI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/DZ/CIQH7E5UPL3DQGEXQXDT2QKNMU2GRLW43TH7QFHITLDVLJKQFZYEDZI.data new file mode 100644 index 00000000..be380799 --- /dev/null +++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/DZ/CIQH7E5UPL3DQGEXQXDT2QKNMU2GRLW43TH7QFHITLDVLJKQFZYEDZI.data @@ -0,0 +1,4730 @@ + +[7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successfu \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/DZ/CIQL7TG2PB52XIZLLHDYIUFMHUQLMMZWBNBZSLDXFCPZ5VDNQQ2WDZQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/DZ/CIQL7TG2PB52XIZLLHDYIUFMHUQLMMZWBNBZSLDXFCPZ5VDNQQ2WDZQ.data new file mode 100644 index 00000000..508cff2e Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/DZ/CIQL7TG2PB52XIZLLHDYIUFMHUQLMMZWBNBZSLDXFCPZ5VDNQQ2WDZQ.data differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/E7/CIQB3KY36M7HMZI5BLRPMPN7LOPHD2LZWNGBZR5BTOBHNWGBDFTAE7A.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/E7/CIQB3KY36M7HMZI5BLRPMPN7LOPHD2LZWNGBZR5BTOBHNWGBDFTAE7A.data new file mode 100644 index 00000000..0b520379 Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/E7/CIQB3KY36M7HMZI5BLRPMPN7LOPHD2LZWNGBZR5BTOBHNWGBDFTAE7A.data differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/EN/CIQCRUFL6YJQJMIKI6BX7HZT3BZQJV45EDKQXCYAUET3RJH5DDU3ENY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/EN/CIQCRUFL6YJQJMIKI6BX7HZT3BZQJV45EDKQXCYAUET3RJH5DDU3ENY.data new file mode 100644 index 00000000..e705b9b0 Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/EN/CIQCRUFL6YJQJMIKI6BX7HZT3BZQJV45EDKQXCYAUET3RJH5DDU3ENY.data differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/EV/CIQJ5DVL3AD53EYMWBQG7MMACDUYWJDJ57AS6VIHPMOWJZEFZDUBEVI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/EV/CIQJ5DVL3AD53EYMWBQG7MMACDUYWJDJ57AS6VIHPMOWJZEFZDUBEVI.data new file mode 100644 index 00000000..725a9b22 --- /dev/null +++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/EV/CIQJ5DVL3AD53EYMWBQG7MMACDUYWJDJ57AS6VIHPMOWJZEFZDUBEVI.data @@ -0,0 +1,5 @@ + +@:4 +" si"¹W\IzxEElM/fLICENSE1 +" JZXoRX!Fwd87U;SöWw README.md{ + \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/HD/CIQDDVW2EZIJF4NQH7WJNESD7XHQSXA5EGJVNTPVHD7444C2KLKXHDI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/HD/CIQDDVW2EZIJF4NQH7WJNESD7XHQSXA5EGJVNTPVHD7444C2KLKXHDI.data new file mode 100644 index 00000000..5ea0edda Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/HD/CIQDDVW2EZIJF4NQH7WJNESD7XHQSXA5EGJVNTPVHD7444C2KLKXHDI.data differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/HD/CIQHILW6C2T5CZPUWAAOL5AJJEZ3MHJ5BNX7WPFW5RNT53JSYDZKHDY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/HD/CIQHILW6C2T5CZPUWAAOL5AJJEZ3MHJ5BNX7WPFW5RNT53JSYDZKHDY.data new file mode 100644 index 00000000..e845c839 Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/HD/CIQHILW6C2T5CZPUWAAOL5AJJEZ3MHJ5BNX7WPFW5RNT53JSYDZKHDY.data differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/HV/CIQOSORZMMBDPROY2NYNBLJRYMPGU23M5WLZD7BMT7RIF7RFJEOHHVY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/HV/CIQOSORZMMBDPROY2NYNBLJRYMPGU23M5WLZD7BMT7RIF7RFJEOHHVY.data new file mode 100644 index 00000000..4eb5d7bf --- /dev/null +++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/HV/CIQOSORZMMBDPROY2NYNBLJRYMPGU23M5WLZD7BMT7RIF7RFJEOHHVY.data @@ -0,0 +1,4 @@ + +A;5 +" $G,A4{xZ/.D` 200Bytes.txt +; \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/HV/CIQPPIDDACRG4OUFQJQ7HWNALKTF6V5TVUZG34DEHJWZ2CFADQEQHVY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/HV/CIQPPIDDACRG4OUFQJQ7HWNALKTF6V5TVUZG34DEHJWZ2CFADQEQHVY.data new file mode 100644 index 00000000..a762644a Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/HV/CIQPPIDDACRG4OUFQJQ7HWNALKTF6V5TVUZG34DEHJWZ2CFADQEQHVY.data differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/I2/CIQMRL3OZOQ6FWVCILNBKQYHSKHGOHWXORGFUFDU6Z3SFI6MWC7CI2I.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/I2/CIQMRL3OZOQ6FWVCILNBKQYHSKHGOHWXORGFUFDU6Z3SFI6MWC7CI2I.data new file mode 100644 index 00000000..8e5a1d76 Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/I2/CIQMRL3OZOQ6FWVCILNBKQYHSKHGOHWXORGFUFDU6Z3SFI6MWC7CI2I.data differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/IB/CIQMYGTLMBSYWR7X6Z676GQTL3W5NOSHG2QSNMWMASRY47R6DISDIBY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/IB/CIQMYGTLMBSYWR7X6Z676GQTL3W5NOSHG2QSNMWMASRY47R6DISDIBY.data new file mode 100644 index 00000000..5b090964 Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/IB/CIQMYGTLMBSYWR7X6Z676GQTL3W5NOSHG2QSNMWMASRY47R6DISDIBY.data differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/IH/CIQA44S3C5B67N6QBLHMMHVPPOUE7L6CUBCDZVQGGAOYAGF3PHL3IHQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/IH/CIQA44S3C5B67N6QBLHMMHVPPOUE7L6CUBCDZVQGGAOYAGF3PHL3IHQ.data new file mode 100644 index 00000000..f9810363 --- /dev/null +++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/IH/CIQA44S3C5B67N6QBLHMMHVPPOUE7L6CUBCDZVQGGAOYAGF3PHL3IHQ.data @@ -0,0 +1,4728 @@ + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. +There have been many attempts at constructing a global +distributed file system. Some \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/IL/CIQJFGRQHQ45VCQLM7AJNF2GF5UHUAGGHC6LLAH6VYDEKLQMD4QLILY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/IL/CIQJFGRQHQ45VCQLM7AJNF2GF5UHUAGGHC6LLAH6VYDEKLQMD4QLILY.data new file mode 100644 index 00000000..62d1c297 --- /dev/null +++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/IL/CIQJFGRQHQ45VCQLM7AJNF2GF5UHUAGGHC6LLAH6VYDEKLQMD4QLILY.data @@ -0,0 +1,8 @@ + +Come hang out in our IRC chat room if you have any questions. + +Contact the ipfs dev team: +- Bugs: https://github.com/ipfs/go-ipfs/issues +- Help: irc.freenode.org/#ipfs +- Email: dev@ipfs.io + \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/J3/CIQCUWNWXF4BLH7JZBCXMVGYBS5FJPGJ6W6SP2WIZ4K7PJVNE4IXJ3I.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/J3/CIQCUWNWXF4BLH7JZBCXMVGYBS5FJPGJ6W6SP2WIZ4K7PJVNE4IXJ3I.data new file mode 100644 index 00000000..00360cfb --- /dev/null +++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/J3/CIQCUWNWXF4BLH7JZBCXMVGYBS5FJPGJ6W6SP2WIZ4K7PJVNE4IXJ3I.data @@ -0,0 +1,3 @@ +4 +" UFrnb⇾?|< test-data + \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/KB/CIQFDNVHVCWCRUXO3W7UGOPXRQBCBQYCHLVLZGSOLCJH6MH53TULKBI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/KB/CIQFDNVHVCWCRUXO3W7UGOPXRQBCBQYCHLVLZGSOLCJH6MH53TULKBI.data new file mode 100644 index 00000000..026ac913 Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/KB/CIQFDNVHVCWCRUXO3W7UGOPXRQBCBQYCHLVLZGSOLCJH6MH53TULKBI.data differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/KL/CIQI55DVBRTRLGOEFG3NSUVUVDC5FV3BQJ6BDG6TQLX5ZMDXH3CDKLY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/KL/CIQI55DVBRTRLGOEFG3NSUVUVDC5FV3BQJ6BDG6TQLX5ZMDXH3CDKLY.data new file mode 100644 index 00000000..7c40850f Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/KL/CIQI55DVBRTRLGOEFG3NSUVUVDC5FV3BQJ6BDG6TQLX5ZMDXH3CDKLY.data differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/KN/CIQC2GMPEBY4FY6LK3KAVQ6KPD2RFEURQB672H4QQPHGWO7HJZAIKNA.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/KN/CIQC2GMPEBY4FY6LK3KAVQ6KPD2RFEURQB672H4QQPHGWO7HJZAIKNA.data new file mode 100644 index 00000000..912b64e0 Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/KN/CIQC2GMPEBY4FY6LK3KAVQ6KPD2RFEURQB672H4QQPHGWO7HJZAIKNA.data differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/KV/CIQEB4J2WBPZIHHIIG4LFC3VPCUL7IRICU6DOD4BWS6GFOQNMZSAKVI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/KV/CIQEB4J2WBPZIHHIIG4LFC3VPCUL7IRICU6DOD4BWS6GFOQNMZSAKVI.data new file mode 100644 index 00000000..9f1e7af6 Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/KV/CIQEB4J2WBPZIHHIIG4LFC3VPCUL7IRICU6DOD4BWS6GFOQNMZSAKVI.data differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/LA/CIQOK7LUYNC5GJDWTV6SEZ4ZJJ64QAMOZ7DFV5GWMJMTD3PN73HPLAY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/LA/CIQOK7LUYNC5GJDWTV6SEZ4ZJJ64QAMOZ7DFV5GWMJMTD3PN73HPLAY.data new file mode 100644 index 00000000..dcd69d0b Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/LA/CIQOK7LUYNC5GJDWTV6SEZ4ZJJ64QAMOZ7DFV5GWMJMTD3PN73HPLAY.data differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/LG/CIQJBQD2O6K4CGJVCCTJNUP57QHR4SKHZ74OIITBBGLOMCO3ZOLWLGA.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/LG/CIQJBQD2O6K4CGJVCCTJNUP57QHR4SKHZ74OIITBBGLOMCO3ZOLWLGA.data new file mode 100644 index 00000000..71be805f --- /dev/null +++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/LG/CIQJBQD2O6K4CGJVCCTJNUP57QHR4SKHZ74OIITBBGLOMCO3ZOLWLGA.data @@ -0,0 +1,9 @@ + +Some helpful resources for finding your way around ipfs: + +- quick-start: a quick show of various ipfs features. +- ipfs commands: a list of all commands +- ipfs --help: every command describes itself +- https://github.com/ipfs/go-ipfs -- the src repository +- #ipfs on irc.freenode.org -- the community irc channel + \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/LM/CIQIFHHL6JGE5AT5CX66FZUHQKFZ3F5J4HDTDEDSFIFRZJOOHYHYLMI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/LM/CIQIFHHL6JGE5AT5CX66FZUHQKFZ3F5J4HDTDEDSFIFRZJOOHYHYLMI.data new file mode 100644 index 00000000..aacafb9f Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/LM/CIQIFHHL6JGE5AT5CX66FZUHQKFZ3F5J4HDTDEDSFIFRZJOOHYHYLMI.data differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/LO/CIQJLKDKHMLW3CBIFI6FNN3RDDQJQ37UPBHLBSDE4IODA3OGUYZNLOI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/LO/CIQJLKDKHMLW3CBIFI6FNN3RDDQJQ37UPBHLBSDE4IODA3OGUYZNLOI.data new file mode 100644 index 00000000..ca141be2 Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/LO/CIQJLKDKHMLW3CBIFI6FNN3RDDQJQ37UPBHLBSDE4IODA3OGUYZNLOI.data differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/LU/CIQKRGVCTXRXFOEYT7SM4CCDVTLJKVZBCKJ2K25QUHDSDC54EBIKLUI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/LU/CIQKRGVCTXRXFOEYT7SM4CCDVTLJKVZBCKJ2K25QUHDSDC54EBIKLUI.data new file mode 100644 index 00000000..69e8f9e4 --- /dev/null +++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/LU/CIQKRGVCTXRXFOEYT7SM4CCDVTLJKVZBCKJ2K25QUHDSDC54EBIKLUI.data @@ -0,0 +1,2 @@ + + \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/MJ/CIQJIH4MW534AFHKJ4RJWSADQQKP5JF75FJD5HAG2FNU24Y5GDLNMJA.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/MJ/CIQJIH4MW534AFHKJ4RJWSADQQKP5JF75FJD5HAG2FNU24Y5GDLNMJA.data new file mode 100644 index 00000000..637f391c --- /dev/null +++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/MJ/CIQJIH4MW534AFHKJ4RJWSADQQKP5JF75FJD5HAG2FNU24Y5GDLNMJA.data @@ -0,0 +1,2 @@ + +x\΃ \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/NW/CIQHKHTR6BILKGTUCWOFDAU3EEHTE3TTXRHQU4JOD5RWMJNIKFKCNWA.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/NW/CIQHKHTR6BILKGTUCWOFDAU3EEHTE3TTXRHQU4JOD5RWMJNIKFKCNWA.data new file mode 100644 index 00000000..44403205 --- /dev/null +++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/NW/CIQHKHTR6BILKGTUCWOFDAU3EEHTE3TTXRHQU4JOD5RWMJNIKFKCNWA.data @@ -0,0 +1,3 @@ + + +x\΃ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/NY/CIQAS5P5V6R6ZWYCMEMIG77GPKPNN3IR55NKZVQ2KFWN35IZWHFVNYI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/NY/CIQAS5P5V6R6ZWYCMEMIG77GPKPNN3IR55NKZVQ2KFWN35IZWHFVNYI.data new file mode 100644 index 00000000..cbd601a6 Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/NY/CIQAS5P5V6R6ZWYCMEMIG77GPKPNN3IR55NKZVQ2KFWN35IZWHFVNYI.data differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/O6/CIQOYW2THIZBRGI7IN33ROGCKOFZLXJJ2MPKYZBTV4H3N7GYHXMAO6A.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/O6/CIQOYW2THIZBRGI7IN33ROGCKOFZLXJJ2MPKYZBTV4H3N7GYHXMAO6A.data new file mode 100644 index 00000000..7b58d6c8 --- /dev/null +++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/O6/CIQOYW2THIZBRGI7IN33ROGCKOFZLXJJ2MPKYZBTV4H3N7GYHXMAO6A.data @@ -0,0 +1,3 @@ +/ +" @ԆDgA7directT2 +" ;APY0k}E=p  recursiveT \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/O7/CIQP7DQNED7CUCZXDXMNPJC2VQXADCIZ6KIZ3JF6FZDZYYKJMDCFO7Y.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/O7/CIQP7DQNED7CUCZXDXMNPJC2VQXADCIZ6KIZ3JF6FZDZYYKJMDCFO7Y.data new file mode 100644 index 00000000..46d10573 Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/O7/CIQP7DQNED7CUCZXDXMNPJC2VQXADCIZ6KIZ3JF6FZDZYYKJMDCFO7Y.data differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/OC/CIQC2GOQFKDUPKVP4XNQVV7M5ZJ4MP57IEM3W2WKV6GLUPPIZTZXOCQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/OC/CIQC2GOQFKDUPKVP4XNQVV7M5ZJ4MP57IEM3W2WKV6GLUPPIZTZXOCQ.data new file mode 100644 index 00000000..3f5311b7 Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/OC/CIQC2GOQFKDUPKVP4XNQVV7M5ZJ4MP57IEM3W2WKV6GLUPPIZTZXOCQ.data differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/OD/CIQOJATAB4IXQU4GS3N6IKABVBWLIVP5HQQOBQLSENPEZBXAU5WGODY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/OD/CIQOJATAB4IXQU4GS3N6IKABVBWLIVP5HQQOBQLSENPEZBXAU5WGODY.data new file mode 100644 index 00000000..f0b3a599 Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/OD/CIQOJATAB4IXQU4GS3N6IKABVBWLIVP5HQQOBQLSENPEZBXAU5WGODY.data differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/OY/CIQDEMXSPYNNJT5CP2V6OMAMSK3VAN5F525AUQF4O5SHFSENFK6MOYA.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/OY/CIQDEMXSPYNNJT5CP2V6OMAMSK3VAN5F525AUQF4O5SHFSENFK6MOYA.data new file mode 100644 index 00000000..a3e60c9e Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/OY/CIQDEMXSPYNNJT5CP2V6OMAMSK3VAN5F525AUQF4O5SHFSENFK6MOYA.data differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/P7/CIQDOBRSMVCPS3KLV6TKWC6SUHX5RG5GAWZO4GENXABVJY3S6QTDP7I.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/P7/CIQDOBRSMVCPS3KLV6TKWC6SUHX5RG5GAWZO4GENXABVJY3S6QTDP7I.data new file mode 100644 index 00000000..bb713c56 Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/P7/CIQDOBRSMVCPS3KLV6TKWC6SUHX5RG5GAWZO4GENXABVJY3S6QTDP7I.data differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/PX/CIQPRIAYMCVHLHZ6F4ZK5YTMBUFWS3GJM3NMSWKCSETRGTI5EWJKPXQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/PX/CIQPRIAYMCVHLHZ6F4ZK5YTMBUFWS3GJM3NMSWKCSETRGTI5EWJKPXQ.data new file mode 100644 index 00000000..5accb645 --- /dev/null +++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/PX/CIQPRIAYMCVHLHZ6F4ZK5YTMBUFWS3GJM3NMSWKCSETRGTI5EWJKPXQ.data @@ -0,0 +1,3 @@ +5 +" $G,A4{xZ/.D` 200Bytes.txt + \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/QE/CIQBIGRKU5D2NNT76M7A62F6AVPNDXPC7EBVACIKPANWX3MEZ5FOQEA.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/QE/CIQBIGRKU5D2NNT76M7A62F6AVPNDXPC7EBVACIKPANWX3MEZ5FOQEA.data new file mode 100644 index 00000000..c3a2f685 Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/QE/CIQBIGRKU5D2NNT76M7A62F6AVPNDXPC7EBVACIKPANWX3MEZ5FOQEA.data differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/QF/CIQCIGHH7ZD6OLFCIHXDI65QV7HXRBC2F4XBVG4KUCTQIA2EMAJ7QFY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/QF/CIQCIGHH7ZD6OLFCIHXDI65QV7HXRBC2F4XBVG4KUCTQIA2EMAJ7QFY.data new file mode 100644 index 00000000..a655cf83 --- /dev/null +++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/QF/CIQCIGHH7ZD6OLFCIHXDI65QV7HXRBC2F4XBVG4KUCTQIA2EMAJ7QFY.data @@ -0,0 +1,5 @@ + +wxxM{ +DzH/&^ RS/v,R +=Ng~pf1\[>%U1@Q׀2&m6qQ؁]|!K E~J ֕읝ojbn3eT)D+;s +컓:Ty!c3\*T7E?[Pv}A+cx~e \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/QF/CIQGPALRQ24P6NS4OWHTQ7R247ZI7KJWP3QWPQYS43LFULQC5ANLQFI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/QF/CIQGPALRQ24P6NS4OWHTQ7R247ZI7KJWP3QWPQYS43LFULQC5ANLQFI.data new file mode 100644 index 00000000..a8f98693 Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/QF/CIQGPALRQ24P6NS4OWHTQ7R247ZI7KJWP3QWPQYS43LFULQC5ANLQFI.data differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/QU/CIQHUGO6PZFU3HS5W5Y25STP74OMT5SRRJZXL4LHDRQ3SIM7NCODQUI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/QU/CIQHUGO6PZFU3HS5W5Y25STP74OMT5SRRJZXL4LHDRQ3SIM7NCODQUI.data new file mode 100644 index 00000000..6d043733 --- /dev/null +++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/QU/CIQHUGO6PZFU3HS5W5Y25STP74OMT5SRRJZXL4LHDRQ3SIM7NCODQUI.data @@ -0,0 +1,2 @@ + +rː'Q# \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/QV/CIQHXG7QQS5B7FZ3UOMUYOWJZ5KWXCKGGO7MBLHYPX6ZZ5XZK66AQVI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/QV/CIQHXG7QQS5B7FZ3UOMUYOWJZ5KWXCKGGO7MBLHYPX6ZZ5XZK66AQVI.data new file mode 100644 index 00000000..1524efce Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/QV/CIQHXG7QQS5B7FZ3UOMUYOWJZ5KWXCKGGO7MBLHYPX6ZZ5XZK66AQVI.data differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/QV/CIQOHMGEIKMPYHAUTL57JSEZN64SIJ5OIHSGJG4TJSSJLGI3PBJLQVI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/QV/CIQOHMGEIKMPYHAUTL57JSEZN64SIJ5OIHSGJG4TJSSJLGI3PBJLQVI.data new file mode 100644 index 00000000..e69de29b diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/R3/CIQBED3K6YA5I3QQWLJOCHWXDRK5EXZQILBCKAPEDUJENZ5B5HJ5R3A.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/R3/CIQBED3K6YA5I3QQWLJOCHWXDRK5EXZQILBCKAPEDUJENZ5B5HJ5R3A.data new file mode 100644 index 00000000..389e1117 --- /dev/null +++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/R3/CIQBED3K6YA5I3QQWLJOCHWXDRK5EXZQILBCKAPEDUJENZ5B5HJ5R3A.data @@ -0,0 +1,28 @@ + +Hello and Welcome to IPFS! + +██╗██████╗ ███████╗███████╗ +██║██╔══██╗██╔════╝██╔════╝ +██║██████╔╝█████╗ ███████╗ +██║██╔═══╝ ██╔══╝ ╚════██║ +██║██║ ██║ ███████║ +╚═╝╚═╝ ╚═╝ ╚══════╝ + +If you're seeing this, you have successfully installed +IPFS and are now interfacing with the ipfs merkledag! + + ------------------------------------------------------- +| Warning: | +| This is alpha software. Use at your own discretion! | +| Much is missing or lacking polish. There are bugs. | +| Not yet secure. Read the security notes for more. | + ------------------------------------------------------- + +Check out some of the other files in this directory: + + ./about + ./help + ./quick-start <-- usage examples + ./readme <-- this file + ./security-notes + \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/RJ/CIQMLF3XEWG6VSOUXDSJ5BG26LKETTMBVMXIW7JKRHRPLIBNCFCYRJI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/RJ/CIQMLF3XEWG6VSOUXDSJ5BG26LKETTMBVMXIW7JKRHRPLIBNCFCYRJI.data new file mode 100644 index 00000000..5a59204a --- /dev/null +++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/RJ/CIQMLF3XEWG6VSOUXDSJ5BG26LKETTMBVMXIW7JKRHRPLIBNCFCYRJI.data @@ -0,0 +1,2 @@ + +stem. Some \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/RW/CIQC3G665ZIYGOQYC3MQULKGIBXEOFAZHK46I5UQ3O7EEJQP4FW6RWI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/RW/CIQC3G665ZIYGOQYC3MQULKGIBXEOFAZHK46I5UQ3O7EEJQP4FW6RWI.data new file mode 100644 index 00000000..1a86e0be Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/RW/CIQC3G665ZIYGOQYC3MQULKGIBXEOFAZHK46I5UQ3O7EEJQP4FW6RWI.data differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/RX/CIQE66Z547DTRXMML2XLVBUPU4PW4HGY3HNOT22D27SWEWL7BM4KRXA.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/RX/CIQE66Z547DTRXMML2XLVBUPU4PW4HGY3HNOT22D27SWEWL7BM4KRXA.data new file mode 100644 index 00000000..74f62a02 --- /dev/null +++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/RX/CIQE66Z547DTRXMML2XLVBUPU4PW4HGY3HNOT22D27SWEWL7BM4KRXA.data @@ -0,0 +1,3 @@ + + +'Q# diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/S5/CIQHBGZNZRPWVEFNMTLP4OS5EAVHFMCX2HD7FZUC2B3WUU3D4LGKS5A.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/S5/CIQHBGZNZRPWVEFNMTLP4OS5EAVHFMCX2HD7FZUC2B3WUU3D4LGKS5A.data new file mode 100644 index 00000000..3a99c365 --- /dev/null +++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/S5/CIQHBGZNZRPWVEFNMTLP4OS5EAVHFMCX2HD7FZUC2B3WUU3D4LGKS5A.data @@ -0,0 +1,3 @@ +4 +" Y9_)a˹2RmŖke9 js-ipfs-repo + \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/SHARDING b/packages/ipfs-unixfs-importer/test/test-repo/blocks/SHARDING new file mode 100644 index 00000000..a153331d --- /dev/null +++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/SHARDING @@ -0,0 +1 @@ +/repo/flatfs/shard/v1/next-to-last/2 diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/SQ/CIQABZII22CKQPRFRNJDBZLZDVWDLXB4FB63ZSHKE25TXTZ5PRFNSQQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/SQ/CIQABZII22CKQPRFRNJDBZLZDVWDLXB4FB63ZSHKE25TXTZ5PRFNSQQ.data new file mode 100644 index 00000000..38a7ed3a Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/SQ/CIQABZII22CKQPRFRNJDBZLZDVWDLXB4FB63ZSHKE25TXTZ5PRFNSQQ.data differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/SS/CIQJYN3KXS5PFDN3C3UKITZEAQ5E2ZZSIKJ5GLFKYZH5G5GPOPZRSSQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/SS/CIQJYN3KXS5PFDN3C3UKITZEAQ5E2ZZSIKJ5GLFKYZH5G5GPOPZRSSQ.data new file mode 100644 index 00000000..562529a2 Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/SS/CIQJYN3KXS5PFDN3C3UKITZEAQ5E2ZZSIKJ5GLFKYZH5G5GPOPZRSSQ.data differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/SY/CIQGL4ZZ3NU3CYC6HCGJL2PB3SCFDLNIMJ2VBGFII2ETFDJKKRWCSYY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/SY/CIQGL4ZZ3NU3CYC6HCGJL2PB3SCFDLNIMJ2VBGFII2ETFDJKKRWCSYY.data new file mode 100644 index 00000000..dedf499f Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/SY/CIQGL4ZZ3NU3CYC6HCGJL2PB3SCFDLNIMJ2VBGFII2ETFDJKKRWCSYY.data differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/T3/CIQFIELZYM2Q2O27HFMWNO3RQCBUN42MNB4WYEHTCKOBKEOQC4X4T3I.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/T3/CIQFIELZYM2Q2O27HFMWNO3RQCBUN42MNB4WYEHTCKOBKEOQC4X4T3I.data new file mode 100644 index 00000000..9e5174d0 --- /dev/null +++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/T3/CIQFIELZYM2Q2O27HFMWNO3RQCBUN42MNB4WYEHTCKOBKEOQC4X4T3I.data @@ -0,0 +1,4 @@ +5 +" $G,A4{xZ/.D` 200Bytes.txt/ +" Y9_)a˹2RmŖke9level-2 + \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/T3/CIQHHFPK232RMHO7DXAEDSDEBS2BUY2XK2X3LJTP4SPIM5NYBINUT3Y.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/T3/CIQHHFPK232RMHO7DXAEDSDEBS2BUY2XK2X3LJTP4SPIM5NYBINUT3Y.data new file mode 100644 index 00000000..5a3836e9 Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/T3/CIQHHFPK232RMHO7DXAEDSDEBS2BUY2XK2X3LJTP4SPIM5NYBINUT3Y.data differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/TK/CIQCATTJHTFQJK6QMVRDXHZBQLVTXHRZR2G3R5OU3G7HREQTYK3KTKQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/TK/CIQCATTJHTFQJK6QMVRDXHZBQLVTXHRZR2G3R5OU3G7HREQTYK3KTKQ.data new file mode 100644 index 00000000..a4027d46 Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/TK/CIQCATTJHTFQJK6QMVRDXHZBQLVTXHRZR2G3R5OU3G7HREQTYK3KTKQ.data differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/TW/CIQFEAGMNNXXTYKYQSANT6IBNTFN7WR5RPD5F6GN6MBKUUO25DNOTWQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/TW/CIQFEAGMNNXXTYKYQSANT6IBNTFN7WR5RPD5F6GN6MBKUUO25DNOTWQ.data new file mode 100644 index 00000000..10aa2ae4 Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/TW/CIQFEAGMNNXXTYKYQSANT6IBNTFN7WR5RPD5F6GN6MBKUUO25DNOTWQ.data differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/TW/CIQMVX3GSIETJNHF3OOH7TMLHB74V6MEEZY5V54Z7JHJV2MUZ7R2TWI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/TW/CIQMVX3GSIETJNHF3OOH7TMLHB74V6MEEZY5V54Z7JHJV2MUZ7R2TWI.data new file mode 100644 index 00000000..c1f9899a Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/TW/CIQMVX3GSIETJNHF3OOH7TMLHB74V6MEEZY5V54Z7JHJV2MUZ7R2TWI.data differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/U2/CIQBQTRBKKRZNSXRN5A2DTCL5QYIC75SVOKTZJSFYV7IHLCDDT6IU2Q.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/U2/CIQBQTRBKKRZNSXRN5A2DTCL5QYIC75SVOKTZJSFYV7IHLCDDT6IU2Q.data new file mode 100644 index 00000000..4e910622 Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/U2/CIQBQTRBKKRZNSXRN5A2DTCL5QYIC75SVOKTZJSFYV7IHLCDDT6IU2Q.data differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/UI/CIQIKOTOTJQ2NFB4A3MMLHKZBTNOIK3QAZ3XSCASZZUPD2ZDRHTOUIY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/UI/CIQIKOTOTJQ2NFB4A3MMLHKZBTNOIK3QAZ3XSCASZZUPD2ZDRHTOUIY.data new file mode 100644 index 00000000..871a6bf0 --- /dev/null +++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/UI/CIQIKOTOTJQ2NFB4A3MMLHKZBTNOIK3QAZ3XSCASZZUPD2ZDRHTOUIY.data @@ -0,0 +1,4729 @@ + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. +There have been many attempts at constructing a global +distributed file sy \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/UN/CIQJXEREB4VRPZXUPP4EUUMABGQ4FLCIV7RFAMWV6VTYHGGTHMQOUNA.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/UN/CIQJXEREB4VRPZXUPP4EUUMABGQ4FLCIV7RFAMWV6VTYHGGTHMQOUNA.data new file mode 100644 index 00000000..a6e00f34 Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/UN/CIQJXEREB4VRPZXUPP4EUUMABGQ4FLCIV7RFAMWV6VTYHGGTHMQOUNA.data differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/UN/CIQOMBKARLB7PAITVSNH7VEGIQJRPL6J7FT2XYVKAXT4MQPXXPUYUNY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/UN/CIQOMBKARLB7PAITVSNH7VEGIQJRPL6J7FT2XYVKAXT4MQPXXPUYUNY.data new file mode 100644 index 00000000..b6539897 Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/UN/CIQOMBKARLB7PAITVSNH7VEGIQJRPL6J7FT2XYVKAXT4MQPXXPUYUNY.data differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/VD/CIQM57VXK2GGRETV46PZJAGXIPIE5O6JRUKAV7BBCJWARIYFH3ZEVDY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/VD/CIQM57VXK2GGRETV46PZJAGXIPIE5O6JRUKAV7BBCJWARIYFH3ZEVDY.data new file mode 100644 index 00000000..6b72d373 --- /dev/null +++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/VD/CIQM57VXK2GGRETV46PZJAGXIPIE5O6JRUKAV7BBCJWARIYFH3ZEVDY.data @@ -0,0 +1,2 @@ + +u r[ \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/VJ/CIQCHY6PCZKAFELCCPFO7GDQ6JVXLCA47BWB47DSAT5DLNKZC4BBVJY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/VJ/CIQCHY6PCZKAFELCCPFO7GDQ6JVXLCA47BWB47DSAT5DLNKZC4BBVJY.data new file mode 100644 index 00000000..9cda061b Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/VJ/CIQCHY6PCZKAFELCCPFO7GDQ6JVXLCA47BWB47DSAT5DLNKZC4BBVJY.data differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/VK/CIQECT7TVGIFJGMWNC2ZGSXRYQXFPA4ZVFEFYTIZ5CZLMW5F3VZDVKY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/VK/CIQECT7TVGIFJGMWNC2ZGSXRYQXFPA4ZVFEFYTIZ5CZLMW5F3VZDVKY.data new file mode 100644 index 00000000..7f2f4e92 Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/VK/CIQECT7TVGIFJGMWNC2ZGSXRYQXFPA4ZVFEFYTIZ5CZLMW5F3VZDVKY.data differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/VO/CIQGFTQ7FSI2COUXWWLOQ45VUM2GUZCGAXLWCTOKKPGTUWPXHBNIVOY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/VO/CIQGFTQ7FSI2COUXWWLOQ45VUM2GUZCGAXLWCTOKKPGTUWPXHBNIVOY.data new file mode 100644 index 00000000..2dd80560 --- /dev/null +++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/VO/CIQGFTQ7FSI2COUXWWLOQ45VUM2GUZCGAXLWCTOKKPGTUWPXHBNIVOY.data @@ -0,0 +1,114 @@ + +  # 0.1 - Quick Start + +This is a set of short examples with minimal explanation. It is meant as +a "quick start". Soon, we'll write a longer tour :-) + + +Add a file to ipfs: + + echo "hello world" >hello + ipfs add hello + + +View it: + + ipfs cat + + +Try a directory: + + mkdir foo + mkdir foo/bar + echo "baz" > foo/baz + echo "baz" > foo/bar/baz + ipfs add -r foo + + +View things: + + ipfs ls + ipfs ls /bar + ipfs cat /baz + ipfs cat /bar/baz + ipfs cat /bar + ipfs ls /baz + + +References: + + ipfs refs + ipfs refs -r + ipfs refs --help + + +Get: + + ipfs get foo2 + diff foo foo2 + + +Objects: + + ipfs object get + ipfs object get /foo2 + ipfs object --help + + +Pin + GC: + + ipfs pin -r + ipfs gc + ipfs ls + ipfs unpin -r + ipfs gc + + +Daemon: + + ipfs daemon (in another terminal) + ipfs id + + +Network: + + (must be online) + ipfs swarm peers + ipfs id + ipfs cat + + +Mount: + + (warning: fuse is finicky!) + ipfs mount + cd /ipfs/< + + +Tool: + + ipfs version + ipfs update + ipfs commands + ipfs config --help + open http://localhost:5001/webui + + +Browse: + + webui: + + http://localhost:5001/webui + + video: + + http://localhost:8080/ipfs/QmVc6zuAneKJzicnJpfrqCH9gSy6bz54JhcypfJYhGUFQu/play#/ipfs/QmTKZgRNwDNZwHtJSjCp6r5FYefzpULfy37JvMt9DwvXse + + images: + + http://localhost:8080/ipfs/QmZpc3HvfjEXvLWGQPWbHk3AjD5j8NEN4gmFN8Jmrd5g83/cs + + markdown renderer app: + + http://localhost:8080/ipfs/QmX7M9CiYXjVeFnkfVGf3y5ixTZ2ACeSGyL1vBJY1HvQPp/mdown + \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/VR/CIQDWXASKC6E6M5YUHWLFE3D5ORIQALUCDXFGAGNWUDBLMHCNE7NVRQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/VR/CIQDWXASKC6E6M5YUHWLFE3D5ORIQALUCDXFGAGNWUDBLMHCNE7NVRQ.data new file mode 100644 index 00000000..64ce0aeb Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/VR/CIQDWXASKC6E6M5YUHWLFE3D5ORIQALUCDXFGAGNWUDBLMHCNE7NVRQ.data differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/W4/CIQCZN3PHPV7FIQRZ2RMICUTK4IM4CCOKKJ45QAY2MK34ECEPNVWW4I.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/W4/CIQCZN3PHPV7FIQRZ2RMICUTK4IM4CCOKKJ45QAY2MK34ECEPNVWW4I.data new file mode 100644 index 00000000..81663143 Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/W4/CIQCZN3PHPV7FIQRZ2RMICUTK4IM4CCOKKJ45QAY2MK34ECEPNVWW4I.data differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/WH/CIQOEE6HDAGCAUN6YNJ2WMWJUZ6PTIZPGPCLKYGPGTIRX5IMJNXZWHQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/WH/CIQOEE6HDAGCAUN6YNJ2WMWJUZ6PTIZPGPCLKYGPGTIRX5IMJNXZWHQ.data new file mode 100644 index 00000000..b75d8023 Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/WH/CIQOEE6HDAGCAUN6YNJ2WMWJUZ6PTIZPGPCLKYGPGTIRX5IMJNXZWHQ.data differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/X3/CIQFTFEEHEDF6KLBT32BFAGLXEZL4UWFNWM4LFTLMXQBCERZ6CMLX3Y.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/X3/CIQFTFEEHEDF6KLBT32BFAGLXEZL4UWFNWM4LFTLMXQBCERZ6CMLX3Y.data new file mode 100644 index 00000000..9553a942 --- /dev/null +++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/X3/CIQFTFEEHEDF6KLBT32BFAGLXEZL4UWFNWM4LFTLMXQBCERZ6CMLX3Y.data @@ -0,0 +1,2 @@ + + \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/XL/CIQAARXKSMRHHPDJX4RMZVL2FA3652JYS3PGJZYATFZEVYZTYZ7OXLQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/XL/CIQAARXKSMRHHPDJX4RMZVL2FA3652JYS3PGJZYATFZEVYZTYZ7OXLQ.data new file mode 100644 index 00000000..e80dbd9a Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/XL/CIQAARXKSMRHHPDJX4RMZVL2FA3652JYS3PGJZYATFZEVYZTYZ7OXLQ.data differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/XO/CIQJGO2B2N75IUEM372FSMG76VV256I4PXBULZZ5ASNLK4FL4EG7XOI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/XO/CIQJGO2B2N75IUEM372FSMG76VV256I4PXBULZZ5ASNLK4FL4EG7XOI.data new file mode 100644 index 00000000..d899663b Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/XO/CIQJGO2B2N75IUEM372FSMG76VV256I4PXBULZZ5ASNLK4FL4EG7XOI.data differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/Y5/CIQDNPSZDJIJ4OGB34IJIMF4NVVGGYKXGXX4DKZ3GF5O4XQ5TCEKY5I.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/Y5/CIQDNPSZDJIJ4OGB34IJIMF4NVVGGYKXGXX4DKZ3GF5O4XQ5TCEKY5I.data new file mode 100644 index 00000000..ba0caf40 Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/Y5/CIQDNPSZDJIJ4OGB34IJIMF4NVVGGYKXGXX4DKZ3GF5O4XQ5TCEKY5I.data differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/YE/CIQAHHAIILXU6ZJ3QZRQJFXG22DLMMTR3ZMBZ3P3DXUEXXVG6UCOYEQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/YE/CIQAHHAIILXU6ZJ3QZRQJFXG22DLMMTR3ZMBZ3P3DXUEXXVG6UCOYEQ.data new file mode 100644 index 00000000..1d48c015 Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/YE/CIQAHHAIILXU6ZJ3QZRQJFXG22DLMMTR3ZMBZ3P3DXUEXXVG6UCOYEQ.data differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/YI/CIQN7OB5A4BJDFSWOWU5P2PHGINZFYFOW4SGA22CWXPWRHTLKR6MYII.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/YI/CIQN7OB5A4BJDFSWOWU5P2PHGINZFYFOW4SGA22CWXPWRHTLKR6MYII.data new file mode 100644 index 00000000..b1df8c51 Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/YI/CIQN7OB5A4BJDFSWOWU5P2PHGINZFYFOW4SGA22CWXPWRHTLKR6MYII.data differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/Z3/CIQHZ35U3FQWXNWA5EK5UUB7WWOAO6DB6OS3PKO65SS674P4ZTJ4Z3A.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/Z3/CIQHZ35U3FQWXNWA5EK5UUB7WWOAO6DB6OS3PKO65SS674P4ZTJ4Z3A.data new file mode 100644 index 00000000..b0ac590e Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/Z3/CIQHZ35U3FQWXNWA5EK5UUB7WWOAO6DB6OS3PKO65SS674P4ZTJ4Z3A.data differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/ZE/CIQAWCSZTDXZMMUBJ4ZT5AP4QK7NG2ICSLINABMIGOV3FL2GV5NBZEI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/ZE/CIQAWCSZTDXZMMUBJ4ZT5AP4QK7NG2ICSLINABMIGOV3FL2GV5NBZEI.data new file mode 100644 index 00000000..3b40300d Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/ZE/CIQAWCSZTDXZMMUBJ4ZT5AP4QK7NG2ICSLINABMIGOV3FL2GV5NBZEI.data differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/ZJ/CIQAC3JCY6ILJZIESXPWN37WGYJHMRBO75J3KRL47DPFYLDHEYZFZJY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/ZJ/CIQAC3JCY6ILJZIESXPWN37WGYJHMRBO75J3KRL47DPFYLDHEYZFZJY.data new file mode 100644 index 00000000..819ec6cf Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/ZJ/CIQAC3JCY6ILJZIESXPWN37WGYJHMRBO75J3KRL47DPFYLDHEYZFZJY.data differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/ZP/CIQK5Z2VYLPEM4X75GWQP23OV737H23CYXARTYUHX2LD73PQPQMTZPY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/ZP/CIQK5Z2VYLPEM4X75GWQP23OV737H23CYXARTYUHX2LD73PQPQMTZPY.data new file mode 100644 index 00000000..c57d7186 Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/ZP/CIQK5Z2VYLPEM4X75GWQP23OV737H23CYXARTYUHX2LD73PQPQMTZPY.data differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/_README b/packages/ipfs-unixfs-importer/test/test-repo/blocks/_README new file mode 100644 index 00000000..23cb0909 --- /dev/null +++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/_README @@ -0,0 +1,30 @@ +This is a repository of IPLD objects. Each IPLD object is in a single file, +named .data. Where is the +"base32" encoding of the CID (as specified in +https://github.com/multiformats/multibase) without the 'B' prefix. +All the object files are placed in a tree of directories, based on a +function of the CID. This is a form of sharding similar to +the objects directory in git repositories. Previously, we used +prefixes, we now use the next-to-last two charters. + + func NextToLast(base32cid string) { + nextToLastLen := 2 + offset := len(base32cid) - nextToLastLen - 1 + return str[offset : offset+nextToLastLen] + } + +For example, an object with a base58 CIDv1 of + + zb2rhYSxw4ZjuzgCnWSt19Q94ERaeFhu9uSqRgjSdx9bsgM6f + +has a base32 CIDv1 of + + BAFKREIA22FLID5AJ2KU7URG47MDLROZIH6YF2KALU2PWEFPVI37YLKRSCA + +and will be placed at + + SC/AFKREIA22FLID5AJ2KU7URG47MDLROZIH6YF2KALU2PWEFPVI37YLKRSCA.data + +with 'SC' being the last-to-next two characters and the 'B' at the +beginning of the CIDv1 string is the multibase prefix that is not +stored in the filename. diff --git a/packages/ipfs-unixfs-importer/test/test-repo/config b/packages/ipfs-unixfs-importer/test/test-repo/config new file mode 100644 index 00000000..cbcdfe3b --- /dev/null +++ b/packages/ipfs-unixfs-importer/test/test-repo/config @@ -0,0 +1 @@ +{"Identity":{"PeerID":"QmQ2zigjQikYnyYUSXZydNXrDRhBut2mubwJBaLXobMt3A","PrivKey":"CAASpgkwggSiAgEAAoIBAQC2SKo/HMFZeBml1AF3XijzrxrfQXdJzjePBZAbdxqKR1Mc6juRHXij6HXYPjlAk01BhF1S3Ll4Lwi0cAHhggf457sMg55UWyeGKeUv0ucgvCpBwlR5cQ020i0MgzjPWOLWq1rtvSbNcAi2ZEVn6+Q2EcHo3wUvWRtLeKz+DZSZfw2PEDC+DGPJPl7f8g7zl56YymmmzH9liZLNrzg/qidokUv5u1pdGrcpLuPNeTODk0cqKB+OUbuKj9GShYECCEjaybJDl9276oalL9ghBtSeEv20kugatTvYy590wFlJkkvyl+nPxIH0EEYMKK9XRWlu9XYnoSfboiwcv8M3SlsjAgMBAAECggEAZtju/bcKvKFPz0mkHiaJcpycy9STKphorpCT83srBVQi59CdFU6Mj+aL/xt0kCPMVigJw8P3/YCEJ9J+rS8BsoWE+xWUEsJvtXoT7vzPHaAtM3ci1HZd302Mz1+GgS8Epdx+7F5p80XAFLDUnELzOzKftvWGZmWfSeDnslwVONkL/1VAzwKy7Ce6hk4SxRE7l2NE2OklSHOzCGU1f78ZzVYKSnS5Ag9YrGjOAmTOXDbKNKN/qIorAQ1bovzGoCwx3iGIatQKFOxyVCyO1PsJYT7JO+kZbhBWRRE+L7l+ppPER9bdLFxs1t5CrKc078h+wuUr05S1P1JjXk68pk3+kQKBgQDeK8AR11373Mzib6uzpjGzgNRMzdYNuExWjxyxAzz53NAR7zrPHvXvfIqjDScLJ4NcRO2TddhXAfZoOPVH5k4PJHKLBPKuXZpWlookCAyENY7+Pd55S8r+a+MusrMagYNljb5WbVTgN8cgdpim9lbbIFlpN6SZaVjLQL3J8TWH6wKBgQDSChzItkqWX11CNstJ9zJyUE20I7LrpyBJNgG1gtvz3ZMUQCn3PxxHtQzN9n1P0mSSYs+jBKPuoSyYLt1wwe10/lpgL4rkKWU3/m1Myt0tveJ9WcqHh6tzcAbb/fXpUFT/o4SWDimWkPkuCb+8j//2yiXk0a/T2f36zKMuZvujqQKBgC6B7BAQDG2H2B/ijofp12ejJU36nL98gAZyqOfpLJ+FeMz4TlBDQ+phIMhnHXA5UkdDapQ+zA3SrFk+6yGk9Vw4Hf46B+82SvOrSbmnMa+PYqKYIvUzR4gg34rL/7AhwnbEyD5hXq4dHwMNsIDq+l2elPjwm/U9V0gdAl2+r50HAoGALtsKqMvhv8HucAMBPrLikhXP/8um8mMKFMrzfqZ+otxfHzlhI0L08Bo3jQrb0Z7ByNY6M8epOmbCKADsbWcVre/AAY0ZkuSZK/CaOXNX/AhMKmKJh8qAOPRY02LIJRBCpfS4czEdnfUhYV/TYiFNnKRj57PPYZdTzUsxa/yVTmECgYBr7slQEjb5Onn5mZnGDh+72BxLNdgwBkhO0OCdpdISqk0F0Pxby22DFOKXZEpiyI9XYP1C8wPiJsShGm2yEwBPWXnrrZNWczaVuCbXHrZkWQogBDG3HGXNdU4MAWCyiYlyinIBpPpoAJZSzpGLmWbMWh28+RJS6AQX6KHrK1o2uw=="},"Datastore":{"Type":"","Path":"","StorageMax":"","StorageGCWatermark":0,"GCPeriod":"","Params":null,"NoSync":false},"Addresses":{"Swarm":["/ip4/0.0.0.0/tcp/4001","/ip6/::/tcp/4001"],"API":"/ip4/127.0.0.1/tcp/5001","Gateway":"/ip4/127.0.0.1/tcp/8080"},"Mounts":{"IPFS":"/ipfs","IPNS":"/ipns","FuseAllowOther":false},"Version":{"Current":"0.4.0-dev","Check":"error","CheckDate":"0001-01-01T00:00:00Z","CheckPeriod":"172800000000000","AutoUpdate":"minor"},"Discovery":{"MDNS":{"Enabled":true,"Interval":10}},"Ipns":{"RepublishPeriod":"","RecordLifetime":"","ResolveCacheSize":128},"Bootstrap":["/ip4/104.131.131.82/tcp/4001/ipfs/QmaCpDMGvV2BGHeYERUEnRQAwe3N8SzbUtfsmvsqQLuvuJ","/ip4/104.236.176.52/tcp/4001/ipfs/QmSoLnSGccFuZQJzRadHn95W2CrSFmZuTdDWP8HXaHca9z","/ip4/104.236.179.241/tcp/4001/ipfs/QmSoLPppuBtQSGwKDZT2M73ULpjvfd3aZ6ha4oFGL1KrGM","/ip4/162.243.248.213/tcp/4001/ipfs/QmSoLueR4xBeUbY9WZ9xGUUxunbKWcrNFTDAadQJmocnWm","/ip4/128.199.219.111/tcp/4001/ipfs/QmSoLSafTMBsPKadTEgaXctDQVcqN88CNLHXMkTNwMKPnu","/ip4/104.236.76.40/tcp/4001/ipfs/QmSoLV4Bbm51jM9C4gDYZQ9Cy3U6aXMJDAbzgu2fzaDs64","/ip4/178.62.158.247/tcp/4001/ipfs/QmSoLer265NRgSp2LA3dPaeykiS1J6DifTC88f5uVQKNAd","/ip4/178.62.61.185/tcp/4001/ipfs/QmSoLMeWqB7YGVLJN3pNLQpmmEk35v6wYtsMGLzSr5QBU3","/ip4/104.236.151.122/tcp/4001/ipfs/QmSoLju6m7xTh3DuokvT3886QRYqxAzb1kShaanJgW36yx"],"Tour":{"Last":""},"Gateway":{"HTTPHeaders":null,"RootRedirect":"","Writable":false},"SupernodeRouting":{"Servers":["/ip4/104.236.176.52/tcp/4002/ipfs/QmXdb7tWTxdFEQEFgWBqkuYSrZd3mXrC7HxkD4krGNYx2U","/ip4/104.236.179.241/tcp/4002/ipfs/QmVRqViDByUxjUMoPnjurjKvZhaEMFDtK35FJXHAM4Lkj6","/ip4/104.236.151.122/tcp/4002/ipfs/QmSZwGx8Tn8tmcM4PtDJaMeUQNRhNFdBLVGPzRiNaRJtFH","/ip4/162.243.248.213/tcp/4002/ipfs/QmbHVEEepCi7rn7VL7Exxpd2Ci9NNB6ifvqwhsrbRMgQFP","/ip4/128.199.219.111/tcp/4002/ipfs/Qmb3brdCYmKG1ycwqCbo6LUwWxTuo3FisnJV2yir7oN92R","/ip4/104.236.76.40/tcp/4002/ipfs/QmdRBCV8Cz2dGhoKLkD3YjPwVFECmqADQkx5ZteF2c6Fy4","/ip4/178.62.158.247/tcp/4002/ipfs/QmUdiMPci7YoEUBkyFZAh2pAbjqcPr7LezyiPD2artLw3v","/ip4/178.62.61.185/tcp/4002/ipfs/QmVw6fGNqBixZE4bewRLT2VXX7fAHUHs8JyidDiJ1P7RUN"]},"API":{"HTTPHeaders":null},"Swarm":{"AddrFilters":null},"Log":{"MaxSizeMB":250,"MaxBackups":1,"MaxAgeDays":0}} \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/test-repo/datastore/000002.ldb b/packages/ipfs-unixfs-importer/test/test-repo/datastore/000002.ldb new file mode 100644 index 00000000..fc04d660 Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/datastore/000002.ldb differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/datastore/000005.ldb b/packages/ipfs-unixfs-importer/test/test-repo/datastore/000005.ldb new file mode 100644 index 00000000..63d9d260 Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/datastore/000005.ldb differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/datastore/CURRENT b/packages/ipfs-unixfs-importer/test/test-repo/datastore/CURRENT new file mode 100644 index 00000000..5b540107 --- /dev/null +++ b/packages/ipfs-unixfs-importer/test/test-repo/datastore/CURRENT @@ -0,0 +1 @@ +MANIFEST-000011 diff --git a/packages/ipfs-unixfs-importer/test/test-repo/datastore/LOCK b/packages/ipfs-unixfs-importer/test/test-repo/datastore/LOCK new file mode 100644 index 00000000..e69de29b diff --git a/packages/ipfs-unixfs-importer/test/test-repo/datastore/LOG b/packages/ipfs-unixfs-importer/test/test-repo/datastore/LOG new file mode 100644 index 00000000..fb2ef830 --- /dev/null +++ b/packages/ipfs-unixfs-importer/test/test-repo/datastore/LOG @@ -0,0 +1,5 @@ +=============== Aug 19, 2016 (CEST) =============== +15:48:10.633634 log@legend F·NumFile S·FileSize N·Entry C·BadEntry B·BadBlock Ke·KeyError D·DroppedEntry L·Level Q·SeqNum T·TimeElapsed +15:48:10.634191 db@open opening +15:48:10.639318 db@janitor F·4 G·0 +15:48:10.639379 db@open done T·5.16729ms diff --git a/packages/ipfs-unixfs-importer/test/test-repo/datastore/LOG.old b/packages/ipfs-unixfs-importer/test/test-repo/datastore/LOG.old new file mode 100644 index 00000000..f5ffd612 --- /dev/null +++ b/packages/ipfs-unixfs-importer/test/test-repo/datastore/LOG.old @@ -0,0 +1,7 @@ +=============== Apr 22, 2016 (WEST) =============== +03:16:42.272495 log@legend F·NumFile S·FileSize N·Entry C·BadEntry B·BadBlock Ke·KeyError D·DroppedEntry L·Level Q·SeqNum T·TimeElapsed +03:16:42.272857 db@open opening +03:16:42.275673 db@janitor F·4 G·0 +03:16:42.275700 db@open done T·2.831108ms +03:16:42.596938 db@close closing +03:16:42.597082 db@close done T·139.194µs diff --git a/packages/ipfs-unixfs-importer/test/test-repo/datastore/MANIFEST-000011 b/packages/ipfs-unixfs-importer/test/test-repo/datastore/MANIFEST-000011 new file mode 100644 index 00000000..7af87ca8 Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/datastore/MANIFEST-000011 differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/version b/packages/ipfs-unixfs-importer/test/test-repo/version new file mode 100644 index 00000000..1e8b3149 --- /dev/null +++ b/packages/ipfs-unixfs-importer/test/test-repo/version @@ -0,0 +1 @@ +6