Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fallback to archive.org URLs for failed downloads of FOSS packages #2284

Merged
merged 1 commit into from
Feb 16, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Core/Net/AutoUpdate.cs
Original file line number Diff line number Diff line change
Expand Up @@ -152,8 +152,8 @@ public void StartUpdateProcess(bool launchCKANAfterUpdate, IUser user = null)
Net.DownloadWithProgress(
new[]
{
new Net.DownloadTarget(fetchedUpdaterUrl.Item1, updaterFilename, fetchedUpdaterUrl.Item2),
new Net.DownloadTarget(fetchedCkanUrl.Item1, ckanFilename, fetchedCkanUrl.Item2),
new Net.DownloadTarget(fetchedUpdaterUrl.Item1, null, updaterFilename, fetchedUpdaterUrl.Item2),
new Net.DownloadTarget(fetchedCkanUrl.Item1, null, ckanFilename, fetchedCkanUrl.Item2),
},
user
);
Expand Down
22 changes: 12 additions & 10 deletions Core/Net/Net.cs
Original file line number Diff line number Diff line change
Expand Up @@ -121,19 +121,21 @@ public static string Download(string url, string filename = null, IUser user = n

public class DownloadTarget
{
public Uri url { get; private set; }
public string filename { get; private set; }
public long size { get; private set; }
public string mimeType { get; private set; }
public Uri url { get; private set; }
public Uri fallbackUrl { get; private set; }
public string filename { get; private set; }
public long size { get; private set; }
public string mimeType { get; private set; }

public DownloadTarget(Uri url, string filename = null, long size = 0, string mimeType = "")
public DownloadTarget(Uri url, Uri fallback = null, string filename = null, long size = 0, string mimeType = "")
{
this.url = url;
this.filename = string.IsNullOrEmpty(filename)
this.url = url;
this.fallbackUrl = fallback;
this.filename = string.IsNullOrEmpty(filename)
? FileTransaction.GetTempFileName()
: filename;
this.size = size;
this.mimeType = mimeType;
this.size = size;
this.mimeType = mimeType;
}
}

Expand All @@ -144,7 +146,7 @@ public static string DownloadWithProgress(string url, string filename = null, IU

public static string DownloadWithProgress(Uri url, string filename = null, IUser user = null)
{
var targets = new[] {new DownloadTarget(url, filename)};
var targets = new[] {new DownloadTarget(url, null, filename)};
DownloadWithProgress(targets, user);
return targets.First().filename;
}
Expand Down
25 changes: 21 additions & 4 deletions Core/Net/NetAsyncDownloader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -22,18 +22,22 @@ public class NetAsyncDownloader
private class NetAsyncDownloaderDownloadPart
{
public Uri url;
public Uri fallbackUrl;
public WebClient agent = new WebClient();
public DateTime lastProgressUpdateTime;
public string path;
public long bytesLeft;
public long size;
public int bytesPerSecond;
public bool triedFallback;
public Exception error;
public int lastProgressUpdateSize;

public NetAsyncDownloaderDownloadPart(Net.DownloadTarget target, string path = null)
{
this.url = target.url;
this.fallbackUrl = target.fallbackUrl;
this.triedFallback = false;
this.path = path ?? Path.GetTempFileName();
size = bytesLeft = target.size;
lastProgressUpdateTime = DateTime.Now;
Expand Down Expand Up @@ -426,16 +430,29 @@ private void FileDownloadComplete(int index, Exception error)
if (error != null)
{
log.InfoFormat("Error downloading {0}: {1}", downloads[index].url, error);

// Check whether we were already downloading the fallback url
if (!downloads[index].triedFallback && downloads[index].fallbackUrl != null)
{
log.InfoFormat("Trying fallback URL: {0}", downloads[index].fallbackUrl);
// Try the fallbackUrl
downloads[index].triedFallback = true;
downloads[index].agent.DownloadFileAsync(downloads[index].fallbackUrl, downloads[index].path);
// Short circuit the completion process so the fallback can run
return;
}
else
{
// If there was an error, remember it, but we won't raise it until
// all downloads are finished or cancelled.
downloads[index].error = error;
}
}
else
{
log.InfoFormat("Finished downloading {0}", downloads[index].url);
}

// If there was an error, remember it, but we won't raise it until
// all downloads are finished or cancelled.
downloads[index].error = error;

if (++completed_downloads == downloads.Count)
{
FinalizeDownloads();
Expand Down
1 change: 1 addition & 0 deletions Core/Net/NetAsyncModulesDownloader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ public void DownloadModules(NetModuleCache cache, IEnumerable<CkanModule> module
downloader.DownloadAndWait(
unique_downloads.Select(item => new Net.DownloadTarget(
item.Key,
item.Value.InternetArchiveDownload,
// Use a temp file name
null,
item.Value.download_size,
Expand Down
16 changes: 16 additions & 0 deletions Core/Types/CkanModule.cs
Original file line number Diff line number Diff line change
Expand Up @@ -623,6 +623,22 @@ public string DescribeInstallStanzas()
return string.Join(", ", descriptions);
}

/// <summary>
/// Return an archive.org URL for this download, or null if it's not there.
/// The filenames look a lot like the filenames in Net.Cache, but don't be fooled!
/// Here it's the first 8 characters of the SHA1 of the DOWNLOADED FILE, not the URL!
/// </summary>
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, the hash of that file will remain consistent. The url may not.

Produces a filename based of the first 8 digits in sha1 hash,
the 'identifier' and the 'version' in the metadata if the
download_hash exists. Returns '0' if there is no download hash
or has an content type other than zip/gz/tar/tar.gz.

There are some tests that ensure the correct filenames are generated.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yup, either way makes enough sense to me. I just wanted to note it explicitly since we have two different 8-digit hexadecimal filename prefixes floating around, and it's not easy to tell that they're (supposed to be) different at a glance.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, actually makes sense for URL in the cache. If the url changes, you probably do want to re-download.

public Uri InternetArchiveDownload
{
get
{
return license.All(l => l.Redistributable)
? new Uri(
$"https://archive.org/download/{identifier}-{version}/{download_hash.sha1.Substring(0, 8)}-{identifier}-{version}.zip")
: null;
}
}

/// <summary>
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Some tests of the filename generation could be useful, but I wouldn't treat this as a blocker.

/// Format a byte count into readable file size
/// </summary>
Expand Down
52 changes: 50 additions & 2 deletions Core/Types/License.cs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ public class License
public static License UnknownLicense => _unknownLicense ?? (_unknownLicense = new License("unknown"));

// TODO: It would be lovely for our build system to write these for us.
private static readonly HashSet<string> valid_licenses = new HashSet<string> {
private static readonly HashSet<string> valid_licenses = new HashSet<string>()
{
"public-domain",
"AFL-3.0",
"AGPL-3.0",
Expand Down Expand Up @@ -49,6 +50,39 @@ public class License
"open-source", "restricted", "unrestricted", "unknown"
};

private static readonly HashSet<string> redistributable_licenses = new HashSet<string>()
{
"public-domain",
"Apache", "Apache-1.0", "Apache-2.0",
"Artistic", "Artistic-1.0", "Artistic-2.0",
"BSD-2-clause", "BSD-3-clause", "BSD-4-clause",
"ISC",
"CC-BY", "CC-BY-1.0", "CC-BY-2.0", "CC-BY-2.5", "CC-BY-3.0", "CC-BY-4.0",
"CC-BY-SA", "CC-BY-SA-1.0", "CC-BY-SA-2.0", "CC-BY-SA-2.5", "CC-BY-SA-3.0", "CC-BY-SA-4.0",
"CC-BY-NC", "CC-BY-NC-1.0", "CC-BY-NC-2.0", "CC-BY-NC-2.5", "CC-BY-NC-3.0", "CC-BY-NC-4.0",
"CC-BY-NC-SA", "CC-BY-NC-SA-1.0", "CC-BY-NC-SA-2.0", "CC-BY-NC-SA-2.5", "CC-BY-NC-SA-3.0", "CC-BY-NC-SA-4.0",
"CC-BY-NC-ND", "CC-BY-NC-ND-1.0", "CC-BY-NC-ND-2.0", "CC-BY-NC-ND-2.5", "CC-BY-NC-ND-3.0", "CC-BY-NC-ND-4.0",
"CC0",
"CDDL", "CPL",
"EFL-1.0", "EFL-2.0",
"Expat", "MIT",
"GPL-1.0", "GPL-2.0", "GPL-3.0",
"LGPL-2.0", "LGPL-2.1", "LGPL-3.0",
"GFDL-1.0", "GFDL-1.1", "GFDL-1.2", "GFDL-1.3",
"GFDL-NIV-1.0", "GFDL-NIV-1.1", "GFDL-NIV-1.2", "GFDL-NIV-1.3",
"LPPL-1.0", "LPPL-1.1", "LPPL-1.2", "LPPL-1.3c",
"MPL-1.1",
"Perl",
"Python-2.0",
"QPL-1.0",
"W3C",
"Zlib",
"Zope",
"WTFPL",
"Unlicense",
"open-source", "unrestricted"
};

private string license;

/// <summary>
Expand All @@ -69,6 +103,21 @@ public License(string license)
this.license = license;
}

/// <summary>
/// Return whether this license permits CKAN and others to redistribute the module.
/// We automatically upload such mods to https://archive.org/details/kspckanmods
/// </summary>
/// <returns>
/// True if redistributable, false otherwise.
/// </returns>
public bool Redistributable
{
get
{
return redistributable_licenses.Contains(license);
}
}

/// <summary>
/// Returns the license as a string.
/// </summary>
Expand All @@ -78,4 +127,3 @@ public override string ToString()
}
}
}