-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathfile.go
70 lines (57 loc) · 1.79 KB
/
file.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
package warc
import (
"fmt"
"os"
"strconv"
"sync"
"sync/atomic"
"time"
)
var filenameGenerationLock = sync.Mutex{}
// GenerateWarcFileName generate a WARC file name following recommendations
// of the specs:
// Prefix-Timestamp-Serial-Crawlhost.warc.gz
func generateWarcFileName(prefix string, compression string, serial *atomic.Uint64) (fileName string) {
filenameGenerationLock.Lock()
defer filenameGenerationLock.Unlock()
// Get host name as reported by the kernel
hostName, err := os.Hostname()
if err != nil {
panic(err)
}
// Don't let serial overflow past 99999, the current maximum with 5 serial digits.
serial.CompareAndSwap(99999, 0)
// Atomically increase the global serial number
serial.Add(1)
formattedSerial := formatSerial(serial, "5")
now := time.Now().UTC()
date := now.Format("20060102150405") + strconv.Itoa(now.Nanosecond())[:3]
var fileExt string
if compression == "GZIP" {
fileExt = ".warc.gz.open"
} else if compression == "ZSTD" {
fileExt = ".warc.zst.open"
} else {
fileExt = ".warc.open"
}
return prefix + "-" + date + "-" + formattedSerial + "-" + hostName + fileExt
}
// formatSerial add the correct padding to the serial
// E.g. with serial = 23 and format = 5:
// formatSerial return 00023
func formatSerial(serial *atomic.Uint64, format string) string {
return fmt.Sprintf("%0"+format+"d", serial.Load())
}
// isFielSizeExceeded compare the size of a file (filePath) with
// a max size (maxSize), if the size of filePath exceed maxSize,
// it returns true, else, it returns false
func isFileSizeExceeded(file *os.File, maxSize float64) bool {
// Get actual file size
stat, err := file.Stat()
if err != nil {
panic(err)
}
fileSize := (float64)((stat.Size() / 1024) / 1024)
// If fileSize exceed maxSize, return true
return fileSize >= maxSize
}