Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace HTTP.download #273

Merged
merged 12 commits into from
Sep 17, 2018
Merged
3 changes: 3 additions & 0 deletions src/HTTP.jl
Original file line number Diff line number Diff line change
Expand Up @@ -597,6 +597,9 @@ function stack(;redirect=true,
}}}}}}}}}}}}
end

include("download.jl")


include("Handlers.jl") ;using .Handlers
include("Servers.jl") ;using .Servers; using .Servers: listen
Base.@deprecate_binding(Nitrogen, Servers, false)
Expand Down
142 changes: 142 additions & 0 deletions src/download.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
using .Pairs

"""
safer_joinpath(basepart, parts...)
A variation on `joinpath`, that is more resistant to directory traversal attacks.
The parts to be joined (excluding the `basepart`),
are not allowed to contain `..`, or begin with a `/`.
If they do then this throws an `DomainError`.
"""
function safer_joinpath(basepart, parts...)
explain = "Possible directory traversal attack detected."
for part in parts
occursin("..", part) && throw(DomainError(part, "contains \"..\". $explain"))
startswith(part, '/') && throw(DomainError(part, "begins with \"/\". $explain"))
end
joinpath(basepart, parts...)
end

function try_get_filename_from_headers(headers)
content_disp = getkv(headers, "Content-Disposition")
if content_disp != nothing
# extract out of Content-Disposition line
# rough version of what is needed in https://github.com/JuliaWeb/HTTP.jl/issues/179
filename_part = match(r"filename\s*=\s*(.*)", content_disp)
if filename_part != nothing
filename = filename_part[1]
quoted_filename = match(r"\"(.*)\"", filename)
if quoted_filename != nothing
# It was in quotes, so it will be double escaped
filename = unescape_string(quoted_filename[1])
end
return filename
end
end
return nothing
end

function try_get_filename_from_remote_path(target)
target == "" && return nothing
filename = basename(target)
if filename == ""
try_get_filename_from_remote_path(dirname(target))
else
filename
end
end


determine_file(::Nothing, resp) = determine_file(tempdir(), resp)
# ^ We want to the filename if possible because extension is useful for FileIO.jl

function determine_file(path, resp)
# get the name
name = if isdir(path)
# got to to workout what file to put there
filename = something(
try_get_filename_from_headers(resp.headers),
try_get_filename_from_remote_path(resp.request.target),
basename(tempname()) # fallback, basically a random string
)
safer_joinpath(path, filename)
else
# It is a file, we are done.
path
end

# get the extension, if we are going to save it in encoded form.
if header(resp, "Content-Encoding") == "gzip"
name *= ".gz"
end
name
end

"""
download(url, [local_path], [headers]; update_period=1, kw...)

Similar to `Base.download` this downloads a file, returning the filename.
If the `local_path`:
- is not provided, then it is saved in a temporary directory
- if part to a directory is provided then it is saved into that directory
- otherwise the local path is uses as the filename to save to.

When saving into a directory, the filename is determined (where possible),
from the rules of the HTTP.

- `update_period` controls how often (in seconds) to report the progress.
- set to `Inf` to disable reporting
- `headers` specifies headers to be used for the HTTP GET request
- any additional keyword args (`kw...`) are passed on to the HTTP request.
"""
function download(url::AbstractString, local_path=nothing, headers=Header[]; update_period=1, kw...)
format_progress(x) = "$(round(100x, digits=2))%"
Copy link
Member Author

@oxinabox oxinabox Sep 17, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Possibly this should not be formatted.
idk what Juno is doing with progress.
OhMyLog doesn't mind if it is given a string or a float.
But something else doing progress bars might like it to be a float.

format_bytes(x) = x==Inf ? "∞ B" : Base.format_bytes(x)
format_seconds(x) = "$(round(x; digits=2)) s"
format_bytes_per_second(x) = format_bytes(x) * "/s"


@debug 1 "downloading $url"
local file
HTTP.open("GET", url, headers; kw...) do stream
resp = startread(stream)
file = determine_file(local_path, resp)
total_bytes = parse(Float64, getkv(resp.headers, "Content-Length", "NaN"))
downloaded_bytes = 0
start_time = now()
prev_time = now()

function report_callback()
prev_time = now()
taken_time = (prev_time - start_time).value / 1000 # in seconds
average_speed = downloaded_bytes / taken_time
remaining_bytes = total_bytes - downloaded_bytes
remaining_time = remaining_bytes / average_speed
completion_progress = downloaded_bytes / total_bytes

@info("Downloading",
source=url,
dest = file,
progress = completion_progress |> format_progress,
time_taken = taken_time |> format_seconds,
time_remaining = remaining_time |> format_seconds,
average_speed = average_speed |> format_bytes_per_second,
downloaded = downloaded_bytes |> format_bytes,
remaining = remaining_bytes |> format_bytes,
total = total_bytes |> format_bytes,
)
end

Base.open(file, "w") do fh
while(!eof(stream))
downloaded_bytes += write(fh, readavailable(stream))
if now() - prev_time > Millisecond(1000update_period)
report_callback()
end
end
end
report_callback()

end
file
end

42 changes: 42 additions & 0 deletions test/download.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
using HTTP

@testset "HTTP.download" begin
@testset "Content-Disposition" begin
invalid_content_disposition_fn = HTTP.download(
"http://test.greenbytes.de/tech/tc2231/attonlyquoted.asis")
@test isfile(invalid_content_disposition_fn)
@test basename(invalid_content_disposition_fn) == "attonlyquoted.asis" # just last part of name



content_disposition_fn = HTTP.download(
"http://test.greenbytes.de/tech/tc2231/inlwithasciifilenamepdf.asis")
@test isfile(content_disposition_fn)
@test basename(content_disposition_fn) == "foo.pdf"

if Sys.isunix() # Don't try this on windows, quotes are not allowed in windows filenames.
escaped_content_disposition_fn = HTTP.download(
"http://test.greenbytes.de/tech/tc2231/attwithasciifnescapedquote.asis")
@test isfile(escaped_content_disposition_fn)
@test basename(escaped_content_disposition_fn) == "\"quoting\" tested.html"
end
end

@testset "Provided Filename" begin
provided_filename = tempname()
returned_filename = HTTP.download(
"http://test.greenbytes.de/tech/tc2231/inlwithasciifilenamepdf.asis",
provided_filename
)
@test provided_filename == returned_filename
@test isfile(provided_filename)


end

@testset "Content-Encoding" begin
gzip_content_encoding_fn = HTTP.download("https://httpbin.org/gzip")
@test isfile(gzip_content_encoding_fn)
@test last(splitext(gzip_content_encoding_fn)) == ".gz"
end
end
6 changes: 4 additions & 2 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,11 @@ using HTTP
println("running loopback.jl tests..."); include("loopback.jl");
# println("running WebSockets.jl tests..."); include("WebSockets.jl");
println("running messages.jl tests..."); include("messages.jl");

println("running download.jl tests..."); include("download.jl");

println("running handlers.jl tests..."); include("handlers.jl")
println("running server.jl tests..."); include("server.jl")
println("running handlers.jl tests..."); include("handlers.jl");
println("running server.jl tests..."); include("server.jl");

println("running async.jl tests..."); include("async.jl");
end;