Skip to content

Commit

Permalink
Merge pull request #103 from oxinabox/ox/fetchmethmore
Browse files Browse the repository at this point in the history
make default fetch fallback to Base.download
  • Loading branch information
oxinabox authored Sep 18, 2019
2 parents 936f315 + 7c28a23 commit c82892a
Show file tree
Hide file tree
Showing 8 changed files with 113 additions and 66 deletions.
4 changes: 2 additions & 2 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "DataDeps"
uuid = "124859b0-ceae-595e-8997-d05f6a7a8dfe"
authors = ["Lyndon White <oxinabox@ucc.asn.au>"]
version = "0.6.5"
version = "0.7.0"

[deps]
HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3"
Expand All @@ -11,7 +11,7 @@ SHA = "ea8e919c-243c-51af-8825-aaa63cd721ce"
[compat]
ExpectationStubs = "0.2.0"
HTTP = "0.8.5"
julia = "0.7, 1"
julia = "1"

[extras]
DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab"
Expand Down
5 changes: 3 additions & 2 deletions docs/src/z20-for-pkg-devs.md
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ register(DataDep(
remote_path::Union{String,Vector{String}...},
[checksum::Union{String,Vector{String}...},]; # Optional, if not provided will generate
# keyword args (Optional):
fetch_method=http_download # (remote_filepath, local_directory_path)->local_filepath
fetch_method=fetch_default # (remote_filepath, local_directory_path)->local_filepath
post_fetch_method=identity # (local_filepath)->Any
))
```
Expand All @@ -126,8 +126,9 @@ register(DataDep(
- Can take a vector of checksums, being one for each file, or a single checksum in which case the per file hashes are `xor`ed to get the target hash. (See [Recursive Structure](@ref))


- `fetch_method=http_download` a function to run to download the files.
- `fetch_method=fetch_default` a function to run to download the files.
- Function should take 2 parameters `(remote_filepath, local_directorypath)`, and can must return the local filepath to the file downloaded
- Default (`fetch_default`) can correctly handle strings containing HTTP[S] URLs, or any `remote_path` type which overloads `Base.basename` and `Base.download`, e.g. [`AWSS3.S3Path`](https://github.com/JuliaCloud/AWSS3.jl/).
- Can take a vector of methods, being one for each file, or a single method, in which case that method is used to download all of them. (See [Recursive Structure](@ref) below)
- Overloading this lets you change things about how the download is done -- the transport protocol.
- The default is suitable for HTTP[/S], without auth. Modifying it can add authentication or an entirely different protocol (e.g. git, google drive etc)
Expand Down
51 changes: 40 additions & 11 deletions src/fetch_helpers.jl
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,50 @@ function progress_update_period()
# Running in a script, probably want minimal updates
"Inf"
else # default
if haskey(ENV, "DATADEP_PROGRESS_UPDATE_PERIOD")
@warn "\"DATADEP_PROGRESS_UPDATE_PERIOD\" is deprecated, use \"DATADEPS_PROGRESS_UPDATE_PERIOD\" instead."
get(ENV, "DATADEP_PROGRESS_UPDATE_PERIOD", "5")
else
"5" # seconds
end
"5" # seconds
end
end
parse(Float32, envvar)
parse(Float32, envvar)
end

"""
fetch_default(remote_path, local_path)
The default fetch method.
It tries to be a little bit smart to work with things other than just HTTP.
See also [`fetch_base`](@ref) and [`fetch_http`](@ref).
"""
function fetch_default(remotepath, localdir)
if remotepath isa AbstractString && occursin(r"^https?://", remotepath)
# It is HTTP, use good HTTP method, that gets filename by HTTP rules
return fetch_http(remotepath, localdir)
else
# More generic fallback, hopefully `Base.basename`
return fetch_base(remotepath, localdir)
end
end


"""
fetch_base(remote_path, local_dir)
Download from `remote_path` to `local_dir`, via `Base` mechanisms.
The download is performed using `Base.download`
and `Base.basename(remote_path)` is used to determine the filename.
This is very limitted in the case of HTTP as the filename is not always encoded in the URL.
But it does work for simple paths like `"http://myserver/files/data.csv"`.
In general for those cases prefer `http_download`.
The more important feature is that this works for anything that has overloaded
`Base.basename` and `Base.download`, e.g. [`AWSS3.S3Path`](https://github.com/JuliaCloud/AWSS3.jl).
While this doesn't work for all transport mechanisms (so some datadeps will still a custom `fetch_method`),
it works for many.
"""
function fetch_base(remote_path, local_dir)
localpath = joinpath(local_dir, basename(remote_path))
return Base.download(remote_path, localpath)
return string(localpath)
end


"""
Expand All @@ -45,8 +77,5 @@ By default it is once per second, though this depends on configuration
"""
function fetch_http(remotepath, localdir; update_period=progress_update_period())
@assert(isdir(localdir))
HTTP.download(remotepath, localdir; update_period=update_period)
return HTTP.download(remotepath, localdir; update_period=update_period)
end



12 changes: 4 additions & 8 deletions src/resolution_automatic.jl
Original file line number Diff line number Diff line change
Expand Up @@ -156,14 +156,10 @@ end
"""
accept_terms(datadep, localpath, remotepath, i_accept_the_terms_of_use)
Ensurses the user accepts the terms of use; otherwise errors out.
Ensures the user accepts the terms of use; otherwise errors out.
"""
function accept_terms(datadep::DataDep, localpath, remotepath, ::Nothing)
if haskey(ENV, "DATADEPS_ALWAY_ACCEPT")
@warn("Environment variable \$DATADEPS_ALWAY_ACCEPT is deprecated. " *
"Please use \$DATADEPS_ALWAYS_ACCEPT instead.")
end
if !(env_bool("DATADEPS_ALWAYS_ACCEPT") || env_bool("DATADEPS_ALWAY_ACCEPT"))
if !env_bool("DATADEPS_ALWAYS_ACCEPT")
response = check_if_accept_terms(datadep, localpath, remotepath)
accept_terms(datadep, localpath, remotepath, response)
else
Expand All @@ -172,14 +168,14 @@ function accept_terms(datadep::DataDep, localpath, remotepath, ::Nothing)
end
function accept_terms(datadep::DataDep, localpath, remotepath, i_accept_the_terms_of_use::Bool)
if !i_accept_the_terms_of_use
abort("User declined to download $(datadep.name). Can not proceed without the data.")
abort("User declined to download $(datadep.name). Cannot proceed without the data.")
end
true
end

function check_if_accept_terms(datadep::DataDep, localpath, remotepath)
println("This program has requested access to the data dependency $(datadep.name).")
println("which is not currently installed. It can be installed automatically, and you will not see this message again.")
println("\n",datadep.extra_message,"\n\n")
println("\n", datadep.extra_message, "\n\n")
input_bool("Do you want to download the dataset from $remotepath to \"$localpath\"?")
end
10 changes: 5 additions & 5 deletions src/types.jl
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ DataDep(
remote_path::Union{String,Vector{String}...},
[checksum::Union{String,Vector{String}...},]; # Optional, if not provided will generate
# keyword args (Optional):
fetch_method=fetch_http # (remote_filepath, local_directory)->local_filepath
fetch_method=fetch_default # (remote_filepath, local_directory)->local_filepath
post_fetch_method=identity # (local_filepath)->Any
)
```
Expand Down Expand Up @@ -62,13 +62,13 @@ DataDep(
- Can take a vector of checksums, being one for each file, or a single checksum in which case the per file hashes are `xor`ed to get the target hash. (See [Recursive Structure](Recursive Structure) below)
- `fetch_method=fetch_http` a function to run to download the files.
- `fetch_method=fetch_default` a function to run to download the files.
- Function should take 2 parameters (remotepath, local_directory), and must return a local filepath
- It is responsible for determining what the local filename should be
- Change this to change the transfer protocol, for example to use an auth'ed connection.
- Default `fetch_http` is a wrapper around `Base.download` which invokes commandline download tools.
- Default `fetch_default` which fully supports HTTP, and has fallbacks to support any type which overloads `Base.basename` and `Base.download` (see [`fetch_base`](@ref))
- Can take a vector of methods, being one for each file, or a single method, in which case that method is used to download all of them. (See [Recursive Structure](Recursive Structure) below)
- Very few people will need to override this if they are just downloading public HTTP files.
- Very few people will need to override this if they are just downloading public HTTP files.
- `post_fetch_method` a function to run after the files have download
- Should take the local filepath as its first and only argument. Can return anything.
Expand All @@ -83,7 +83,7 @@ DataDep(
function DataDep(name::String,
message::String,
remotepath, hash=nothing;
fetch_method=fetch_http,
fetch_method=fetch_default,
post_fetch_method=identity)

DataDep(name, remotepath, hash, fetch_method, post_fetch_method, message)
Expand Down
1 change: 0 additions & 1 deletion test/REQUIRE

This file was deleted.

19 changes: 19 additions & 0 deletions test/fetch_helpers.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
using Test
using DataDeps: fetch_default, fetch_base, fetch_http

ENV["DATADEPS_ALWAYS_ACCEPT"] = true

@testset "easy https url" begin
url = "https://www.angio.net/pi/digits/10000.txt"
# This is easy because the filename is in the URL
# So it works with both `fetch_base` and `fetch_http`
# HTTP.jl has tests for much more difficult cases, and fetch_http supports those
@testset "$fetch_func" for fetch_func in (fetch_default, fetch_base, fetch_http)
mktempdir() do localdir
localpath = fetch_func(url, localdir)
@test isfile(localpath)
@test localpath == joinpath(localdir, "10000.txt")
@test stat(localpath).size == 10_001
end
end
end
77 changes: 40 additions & 37 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,50 +2,53 @@ using Test

@info "Notice: These tests check paths some of which are supposed to throw warnings. Others of which are not. Some attention should be paid when reading the output."

tests = [
"util",
"locations",
"main",
"preupload"
]

for filename in tests
@testset "$filename" begin
include(filename * ".jl")
@testset "DataDeps.jl" begin
tests = [
"util",
"locations",
"main",
"preupload",
"fetch_helpers",
]
@testset "tests" begin
for filename in tests
@testset "$filename" begin
include(filename * ".jl")
end
end
end
end

examples = [
"examples.jl",
"examples_manual.jl",
]

examples = [
"examples.jl",
"examples_manual.jl",
]

@testset "examples" for fn in examples
@testset "$fn" begin
tempdir = mktempdir()
try
@info("sending all datadeps to $tempdir")
withenv("DATADEPS_LOAD_PATH"=>tempdir,
"DATADEPS_NO_STANDARD_LOADPATH"=>true) do
@testset "download and use" begin
include(fn)
end
withenv("DATADEPS_DISABLE_DOWNLOAD"=>"true") do
@testset "use already downloaded" begin
@testset "examples" for fn in examples
@testset "$fn" begin
tempdir = mktempdir()
try
@info("sending all datadeps to $tempdir")
withenv("DATADEPS_LOAD_PATH"=>tempdir,
"DATADEPS_NO_STANDARD_LOADPATH"=>true) do
@testset "download and use" begin
include(fn)
end
withenv("DATADEPS_DISABLE_DOWNLOAD"=>"true") do
@testset "use already downloaded" begin
include(fn)
end
end
end
finally
try
@info("removing $tempdir")
cd(@__DIR__) # Ensure not currently in directory being deleted
rm(tempdir, recursive=true, force=true)
catch err
@warn("Something went wrong with removing $tempdir")
@warn(err)
end
end
finally
try
@info("removing $tempdir")
cd(@__DIR__) # Ensure not currently in directory being deleted
rm(tempdir, recursive=true, force=true)
catch err
@warn("Something went wrong with removing $tempdir")
@warn(err)
end
end
end
end

3 comments on commit c82892a

@oxinabox
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator register()

@oxinabox
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator register()

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/3637

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if Julia TagBot is installed, or can be done manually through the github interface, or via:

git tag -a v0.7.0 -m "<description of version>" c82892ab0b72c1d544aa73b106cabbc73d90ea33
git push origin v0.7.0

Please sign in to comment.