forked from SwiftPackageIndex/PackageList
-
Notifications
You must be signed in to change notification settings - Fork 0
/
nightly.swift
executable file
·613 lines (484 loc) · 20.8 KB
/
nightly.swift
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
#!/usr/bin/env swift
import Foundation
print("INFO: Running...")
let fileManager = FileManager.default
let decoder = JSONDecoder()
/// When run via GitHub Actions, requests to GitHub can happen so quickly that we hit a hidden rate limit. As such we introduce a throttle so if the requests happen
/// too quickly then we take a break. (Time in seconds)
let requestThrottleDelay: TimeInterval = 1
let timeoutIntervalForRequest = 3000.0
let timeoutIntervalForResource = 6000.0
let httpMaximumConnectionsPerHost = 10
let processTimeout = 50.0
let rawGitHubBaseURL = URLComponents(string: "https://raw.githubusercontent.com")!
// We have a special Personal Access Token (PAT) which is used to increase our rate limit allowance up to 5,000 to enable
// us to process every package.
let patToken = ProcessInfo.processInfo.environment["GH_API_TOKEN_BASE64"]?.trimmingCharacters(in: .whitespacesAndNewlines)
if patToken == nil {
print("Warning: Using anonymous authentication -- you will quickly run into rate limiting issues\n")
}
let config: URLSessionConfiguration = .default
config.timeoutIntervalForRequest = timeoutIntervalForRequest
config.timeoutIntervalForResource = timeoutIntervalForResource
config.httpMaximumConnectionsPerHost = httpMaximumConnectionsPerHost
let session = URLSession(configuration: config)
enum SourceHost: String {
case GitHub = "github.com"
}
// MARK: - Error
enum ValidatorError: Error {
case invalidURL(String)
case timedOut
case noData
case networkingError(Error)
case decoderError(Error)
case unknownGitHost(String?)
case fileSystemError(Error)
case badPackageDump(String?)
case missingProducts
case rateLimitExceeded(Int)
case packageDoesNotExist(String)
case dumpTimedOut
case unknownError(Error)
case repoIsFork
case outdatedToolsVersion
var localizedDescription: String {
switch self {
case .invalidURL(let url):
return "Invalid URL: \(url)"
case .timedOut:
return "Request Timed Out"
case .noData:
return "No Data Received"
case .dumpTimedOut:
return "Dump Timed Out"
case .networkingError(let error), .decoderError(let error), .fileSystemError(let error), .unknownError(let error):
return error.localizedDescription
case .unknownGitHost(let host):
return "Unknown URL host: \(host ?? "nil")"
case .badPackageDump(let output):
return "Bad Package Dump -- \(output ?? "No Output")"
case .missingProducts:
return "Missing Products"
case .rateLimitExceeded(let limit):
return "Rate Limit of \(limit) Exceeded"
case .packageDoesNotExist(let url):
return "Package Does Not Exist: \(url)"
case .repoIsFork:
// A decision has been made that the index as a whole should support forks, but not as part of dependency analysis.
//
// This is because there's an unhealthy amount of forks with a single patch to simply make the dependency work with their library,
// thse are often unmaintained and don't delivery huge amounts of value.
return "Forks are not added as part of dependency analysis"
case .outdatedToolsVersion:
return "Repo is using an outdated package format"
}
}
}
// MARK: - Networking
func downloadSync(url: String, timeout: Int = 10) -> Result<Data, ValidatorError> {
let semaphore = DispatchSemaphore(value: 0)
guard let apiURL = URL(string: url) else {
return .failure(.invalidURL(url))
}
var payload: Data?
var taskError: ValidatorError?
var request = URLRequest(url: apiURL)
if let token = patToken, apiURL.host?.contains(SourceHost.GitHub.rawValue) == true {
request.addValue("Basic \(token)", forHTTPHeaderField: "Authorization")
}
let task = session.dataTask(with: request) { (data, response, error) in
let httpResponse = response as? HTTPURLResponse
if let limit = httpResponse?.value(forHTTPHeaderField: "X-RateLimit-Limit").flatMap(Int.init),
let remaining = httpResponse?.value(forHTTPHeaderField: "X-RateLimit-Remaining").flatMap(Int.init),
remaining == 0 {
taskError = .rateLimitExceeded(limit)
} else if httpResponse?.statusCode == 404 {
taskError = .packageDoesNotExist(apiURL.absoluteString)
} else if let error = error {
taskError = .networkingError(error)
}
if let dataUnwrapped = data, httpResponse?.statusCode != 200 {
print(String(data: dataUnwrapped, encoding: .utf8) ?? "No Data")
}
payload = data
semaphore.signal()
}
task.resume()
switch semaphore.wait(timeout: .now() + .seconds(timeout)) {
case .timedOut:
return .failure(.timedOut)
case .success where taskError != nil:
return .failure(taskError!)
case .success where payload == nil:
return .failure(.noData)
case .success:
return .success(payload!)
}
}
func downloadJSONSync<Payload: Decodable>(url: String, timeout: Int = 10) -> Result<Payload, ValidatorError> {
let decoder = JSONDecoder()
let result = downloadSync(url: url, timeout: timeout)
switch result {
case .failure(let error):
return .failure(error)
case .success(let data):
do {
return .success(try decoder.decode(Payload.self, from: data))
} catch {
return .failure(.decoderError(error))
}
}
}
class RedirectFollower: NSObject, URLSessionDataDelegate {
var lastURL: URL?
func urlSession(
_ session: URLSession,
task: URLSessionTask,
willPerformHTTPRedirection response: HTTPURLResponse,
newRequest request: URLRequest,
completionHandler: @escaping (URLRequest?) -> Void
) {
lastURL = request.url ?? lastURL
completionHandler(request)
}
}
enum RedirectResult {
case unchanged
case notFound
case rateLimitHit
case unknownError(Error)
case redirected(URL)
}
extension URL {
func normalised() -> URL {
URL(string: removingGitExtension().absoluteString.lowercased())!
}
func removingGitExtension() -> URL {
if absoluteString.hasSuffix(".git") {
let lastPath = lastPathComponent.components(separatedBy: ".").dropLast().joined(separator: ".")
return self.deletingLastPathComponent().appendingPathComponent(lastPath)
}
return self
}
func followingRedirects() -> RedirectResult {
let semaphore = DispatchSemaphore(value: 0)
let follower = RedirectFollower()
let session = URLSession(configuration: .default, delegate: follower, delegateQueue: nil)
var result: RedirectResult?
let task = session.dataTask(with: self) { (data, response, error) in
if let error = error {
result = .unknownError(error)
}
if let lastURL = follower.lastURL, self.removingGitExtension().absoluteString != lastURL.absoluteString {
result = .redirected(lastURL)
}
let httpResponse = response as? HTTPURLResponse
if let statusCode = httpResponse?.statusCode {
switch statusCode {
case 404:
result = .notFound
case 429:
result = .rateLimitHit
case 200:
break
default:
// We got a status code which was neither 200 nor 404. We won't do anything with this for now, but
// we'll print it to make it easier to debug and find any patterns.
print("INFO: \(self.absoluteString) - Received a \(statusCode) status code")
}
}
semaphore.signal()
}
task.resume()
_ = semaphore.wait(timeout: .now() + 10)
return result ?? .unchanged
}
}
extension Array where Element == URL {
func removingDuplicatesAndSort() -> [URL] {
var normalisedList = Set<URL>()
return compactMap { url in
let normalisedURL = url.normalised()
if normalisedList.contains(normalisedURL) {
return nil
}
normalisedList.insert(normalisedURL)
return url
}.sorted(by: {
$0.absoluteString.lowercased() < $1.absoluteString.lowercased()
})
}
mutating func replace(_ url: Element, with new: Element) -> Bool {
guard let index = firstIndex(of: url) else {
return false
}
self[index] = new
return true
}
mutating func remove(_ url: Element) -> Bool {
guard let index = firstIndex(of: url) else {
return false
}
self.remove(at: index)
return true
}
func containsSameElements(as other: [Element]) -> Bool {
return self.count == other.count &&
self.map(\.absoluteString).sorted() == other.map(\.absoluteString).sorted()
}
}
// https://developer.github.com/v3/repos/#get-a-repository
struct Repository: Decodable {
let default_branch: String
let stargazers_count: Int
let html_url: URL
let fork: Bool
}
struct Product: Decodable {
let name: String
}
struct Dependency: Decodable, Hashable {
let name: String
let url: URL
}
struct Package: Decodable {
let name: String
let products: [Product]
let dependencies: [Dependency]
}
struct SwiftPackage: Decodable {
let package: Package
let repo: Repository
}
class PackageFetcher {
let repoOwner: String
let repoName: String
init(repoURL: URL) throws {
let components = repoURL.removingGitExtension().path.components(separatedBy: "/")
guard components.count == 3 else {
throw ValidatorError.invalidURL(repoURL.path)
}
repoOwner = components[1]
repoName = components[2]
}
func fetch() -> Result<SwiftPackage, ValidatorError> {
do {
let repo = try fetchRepository().get()
let packageURL = try getPackageSwiftURL(repository: repo).get()
let packageLocalURL = try downloadPackageSwift(url: packageURL).get()
let packageData = try dumpPackage(atURL: packageLocalURL).get()
let package = try JSONDecoder().decode(Package.self, from: packageData)
return .success(SwiftPackage(package: package, repo: repo))
} catch let error as ValidatorError {
return .failure(error)
} catch {
return .failure(.unknownError(error))
}
}
private func fetchRepository() -> Result<Repository, ValidatorError> {
downloadJSONSync(url: "https://api.github.com/repos/\(repoOwner)/\(repoName)")
}
private func getPackageSwiftURL(repository: Repository) -> Result<URL, ValidatorError> {
var rawURLComponents = rawGitHubBaseURL
rawURLComponents.path = ["", repoOwner, repoName, repository.default_branch, "Package.swift"].joined(separator: "/")
guard let packageURL = rawURLComponents.url else {
return .failure(.invalidURL(rawURLComponents.path))
}
return .success(packageURL)
}
private func downloadPackageSwift(url: URL) -> Result<URL, ValidatorError> {
switch downloadSync(url: url.absoluteString) {
case .failure(let error):
return .failure(error)
case .success(let packageData):
let fileManager = FileManager.default
let outputDirectoryURL = fileManager.temporaryDirectory.appendingPathComponent(UUID().uuidString)
do {
try fileManager.createDirectory(at: outputDirectoryURL, withIntermediateDirectories: false, attributes: nil)
try packageData.write(to: outputDirectoryURL.appendingPathComponent("Package.swift"), options: .atomic)
return .success(outputDirectoryURL)
} catch {
return .failure(.fileSystemError(error))
}
}
}
private func dumpPackageProcessAt(_ packageDirectoryURL: URL, outputTo pipe: Pipe, errorsTo errorPipe: Pipe) -> Process {
let process = Process()
process.launchPath = "/usr/bin/swift"
process.arguments = ["package", "dump-package"]
process.currentDirectoryURL = packageDirectoryURL
process.standardOutput = pipe
process.standardError = errorPipe
return process
}
private func dumpPackage(atURL url: URL) -> Result<Data, ValidatorError> {
let semaphore = DispatchSemaphore(value: 0)
let pipe = Pipe()
let errorPipe = Pipe()
let process = dumpPackageProcessAt(url, outputTo: pipe, errorsTo: errorPipe)
var result: Result<Data, ValidatorError>?
process.terminationHandler = { process in
guard process.terminationStatus == 0 else {
if process.terminationStatus == 15 {
result = .failure(.dumpTimedOut)
} else {
let errorDump = String(data: errorPipe.fileHandleForReading.readDataToEndOfFile(), encoding: .utf8)
if errorDump?.contains("using Swift tools version 3.1.0 which is no longer supported") == true {
result = .failure(.outdatedToolsVersion)
} else {
result = .failure(.badPackageDump(errorDump))
}
}
semaphore.signal()
return
}
result = .success(pipe.fileHandleForReading.readDataToEndOfFile())
semaphore.signal()
}
process.launch()
_ = semaphore.wait(timeout: .now() + processTimeout)
if process.isRunning {
process.terminate()
}
return result ?? .failure(.timedOut)
}
}
// MARK: - Running Code
// Get the current directory
let currentDirectory = URL(fileURLWithPath: fileManager.currentDirectoryPath)
// Get the packages.json
let packagesURL = currentDirectory.appendingPathComponent("packages.json")
let originalPackageData = try Data(contentsOf: packagesURL)
let originalPackages = try decoder.decode([URL].self, from: originalPackageData)
print("Found \(originalPackages.count) packages")
// Remove Duplicates
var filteredPackages = originalPackages.removingDuplicatesAndSort()
if filteredPackages.count != originalPackages.count {
print("CHANGE: Packages JSON already contained duplicate URLs, these have been removed.")
}
// Follow Redirects and Remove 404s
//
// We will attempt to load the HTML URL (the URL minus the .git extension) and follow any redirects that occur.
// If we 404 (Not Found) then we remove the URL from the package list. If the URL we end up on is not the same as the
// one we have listed then we replace it with the new URL to keep our list as accurate as possible.
do {
let tempStorage = filteredPackages
var lastRequestDate = Date()
tempStorage.forEach { url in
let timeSinceLastRequest = abs(lastRequestDate.timeIntervalSinceNow)
if timeSinceLastRequest < requestThrottleDelay {
usleep(1000000 * useconds_t(requestThrottleDelay - timeSinceLastRequest))
}
lastRequestDate = Date()
var recursiveCount = 0
func process(packageURL: URL) {
let result = packageURL.followingRedirects()
switch result {
case .notFound:
guard filteredPackages.remove(packageURL) else {
print("ERROR: Failed to remove \(packagesURL.path) (404)")
return
}
print("CHANGE: Removed \(packageURL.path) as it returned a 404")
case .redirected(let newURL):
let newURLWithSuffix = newURL.appendingPathExtension("git")
guard filteredPackages.replace(packageURL, with: newURLWithSuffix) else {
print("ERROR: Failed to replace \(packageURL.path) with \(newURLWithSuffix.path)")
return
}
print("CHANGE: Replaced \(packageURL.path) with \(newURLWithSuffix.path)")
case .rateLimitHit:
recursiveCount += 1
if recursiveCount <= 3 {
print("INFO: Retrying \(packageURL.path) due to rate limits, sleeping first.")
sleep(30)
process(packageURL: packageURL)
} else {
print("INFO: Failed to process \(packageURL.path) due to rate limits.")
sleep(15)
}
case .unknownError(let error):
print("ERROR: Unknown error for URL: \(packageURL.path) - \(error.localizedDescription)")
case .unchanged:
break
}
}
process(packageURL: url)
}
}
// Dependency Analysis
//
// We will cycle through every package, validate that we can download and dump it's Package.swift and then extract a list
// of every dependency it has. We will then cycle through each of those dependencies and validate those.
//
// The goal of this step is to identify dependenices of known packages which are themselves unknown to our list increasing
// our coverage.
do {
var allDependencies = Set<Dependency>()
filteredPackages.forEach { url in
do {
let fetcher = try PackageFetcher(repoURL: url)
let package = try fetcher.fetch().get()
package.package.dependencies.forEach { allDependencies.insert($0) }
} catch {
print("ERROR: Failed to obtain package information for \(url.path)")
print(error)
}
}
let normalisedURLs = filteredPackages.map { $0.normalised() }
let uniqueDependencies = allDependencies.filter { normalisedURLs.contains($0.url.normalised()) == false }
print("INFO: Found \(allDependencies.count) dependencies from \(filteredPackages.count) packages. \(uniqueDependencies.count) are unique.")
uniqueDependencies.forEach { dependency in
do {
let fetcher = try PackageFetcher(repoURL: dependency.url)
let package = try fetcher.fetch().get()
if package.package.products.isEmpty {
throw ValidatorError.missingProducts
}
if package.repo.fork {
throw ValidatorError.repoIsFork
}
// Passed validation, let's add it to the array of URLs
filteredPackages.append(package.repo.html_url.appendingPathExtension("git"))
print("CHANGE: Added \(package.repo.html_url.path)")
} catch {
print("ERROR: Dependency (\(dependency.url.path)) did not pass validation:")
print(error)
}
}
}
// Remove Duplicates (Final)
//
// There's a possibility with the redirects being removed that we've now made some duplicates, let's remove them.
do {
let tempStorage = filteredPackages
filteredPackages = filteredPackages.removingDuplicatesAndSort()
if tempStorage.count != filteredPackages.count {
print("CHANGE: Removed \(tempStorage.count - filteredPackages.count) duplicate URLs")
}
}
// Detect Changes
//
// We compare the newly updated list to the original packages list we downloaded. If they're the exact same then we can
// safely stop now.
if filteredPackages.containsSameElements(as: originalPackages) {
print("No Changes Made")
exit(EXIT_SUCCESS)
}
// Save Backup
//
// To mitigate against data-loss we store a backup of the packages.json before we override it with our changes.
let backupLocation = currentDirectory.appendingPathComponent("packages.backup.json")
try? originalPackageData.write(to: backupLocation)
// Save New Changes
do {
let encoder = JSONEncoder()
encoder.outputFormatting = [ .prettyPrinted ]
let data = try encoder.encode(filteredPackages)
let string = String(data: data, encoding: .utf8)!.replacingOccurrences(of: "\\/", with: "/")
let unescapedData = string.data(using: .utf8)!
try unescapedData.write(to: packagesURL)
print("INFO: packages.json has been updated")
}
exit(EXIT_SUCCESS)