-
Notifications
You must be signed in to change notification settings - Fork 2
/
index.go
97 lines (82 loc) · 2.66 KB
/
index.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
package main
import (
"bufio"
"fmt"
"github.com/spf13/cobra"
"os"
"strconv"
"sort"
"strings"
"github.com/facette/natsort"
)
var indexCmd = &cobra.Command{
Use: "index --file [inputFile] --bin [binSize]",
Short: "Create index, input file must already be sorted",
Run: func(cmd *cobra.Command, args []string) {
inputFile, _ := cmd.Flags().GetString("file")
binSize, _ := cmd.Flags().GetInt("bin")
file, err := os.Open(inputFile)
if err != nil {
fmt.Println(inputFile, "file not found.")
return
}
defer file.Close()
index := make(map[string]map[int][]int)
var previousFilePosition, currentFilePosition int = 0, 0
scanner := bufio.NewScanner(file)
for scanner.Scan() {
line := scanner.Text()
currentFilePosition += len(line) + 1
if strings.HasPrefix(line, "#") {
previousFilePosition = currentFilePosition
continue
}
fields := strings.Split(line, "\t")
chr := fields[0]
start, _ := strconv.Atoi(fields[1])
binStart := (start / binSize) * binSize
if _, exists := index[chr]; !exists {
index[chr] = make(map[int][]int)
}
if _, exists := index[chr][binStart]; !exists {
index[chr][binStart] = []int{previousFilePosition, currentFilePosition}
} else {
index[chr][binStart][1] = currentFilePosition
}
previousFilePosition = currentFilePosition
}
indexFile := inputFile + ".idx"
outFile, err := os.Create(indexFile)
if err != nil {
fmt.Println("error creating index file:", err)
return
}
defer outFile.Close()
fmt.Fprintf(outFile, "#BIN\t%d\t%d\n", binSize, getFileSize(inputFile))
var chrs []string
for chr := range index {
chrs = append(chrs, chr)
}
natsort.Sort(chrs)
for _, chr := range chrs {
regions := index[chr]
var keys []int
for key := range regions {
keys = append(keys, key)
}
sort.Ints(keys)
for _, binStart := range keys {
start := regions[binStart][0]
stop := regions[binStart][1]
fmt.Fprintf(outFile, "%s\t%d\t%d\t%d\n", chr, binStart, start, stop)
}
}
},
}
func getFileSize(filename string) int64 {
fileInfo, err := os.Stat(filename)
if err != nil {
return 0
}
return fileInfo.Size()
}