-
Notifications
You must be signed in to change notification settings - Fork 0
/
io.nim
217 lines (187 loc) · 6.14 KB
/
io.nim
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
import
std/[algorithm, options, sequtils, sets, strformat, strutils, sugar, tables],
relatives
proc read_tsv*(file: File): HashSet[Individual] =
#[Read from 3-columned TSV, where the columns are in the order child, sire, and dam.]#
var individuals: HashSet[Individual]
for line in lines(file):
if line.startsWith("#"):
continue
let split = line.split("\t")
let id = split[0]
let sire = split[1]
let dam = split[2]
# Create object for sire
var sire_obj: Option[Individual]
if sire == "":
sire_obj = none(Individual)
else:
sire_obj = some(Individual(
id: sire,
sire: none(Individual),
dam: none(Individual),
children: initHashSet[Individual](),
sex: male
))
if individuals.contains(sire_obj.get()):
# Update sex if already in HashSet
individuals[sire_obj.get()].sex = male
sire_obj = some(individuals[sire_obj.get()])
else:
individuals.incl(sire_obj.get())
# Create object for dam
var dam_obj: Option[Individual]
if dam == "":
dam_obj = none(Individual)
else:
dam_obj = some(Individual(
id: dam,
sire: none(Individual),
dam: none(Individual),
children: initHashSet[Individual](),
sex: female
))
if individuals.contains(dam_obj.get()):
# Update sex if already in HashSet
individuals[dam_obj.get()].sex = female
dam_obj = some(individuals[dam_obj.get()])
else:
individuals.incl(dam_obj.get())
# Create object for child
let child = Individual(
id: id,
sire: sire_obj,
dam: dam_obj,
children: initHashSet[Individual](),
sex: unknown
)
# Update existing record
if child in individuals:
individuals[child].sire = sire_obj
individuals[child].dam = dam_obj
# Else add new record
else:
individuals.incl(child)
# Add children to records
for individual in individuals:
if individual.sire.isSome:
individuals[individual.sire.get()].children.incl(individual)
if individual.dam.isSome:
individuals[individual.dam.get()].children.incl(individual)
return individuals
proc write_list*(individuals: HashSet[Individual]) =
#[Write individuals, one individual per line.]#
let sequence = individuals.toSeq().sorted(cmp=cmpIndividuals)
for indiv in sequence:
echo indiv.id
proc write_plink*(individuals: HashSet[Individual]) =
#[Write individuals to PLINK-style TSV.
Has five columns: family, child, sire, dam, sex, and affected status.
Family and affected status, however, are constant.]#
let sequence = individuals.toSeq().sorted(cmp=cmpIndividuals)
var included_individuals: HashSet[Individual]
# Set all animals to same family with unknown affected status
let
family = "1"
affected = "0"
var
sire_id: string
dam_id: string
sex: string
for indiv in sequence:
# Set parents and sex as missing initially
sire_id = "0"
dam_id = "0"
sex = "0"
if indiv.sire.isSome():
if indiv.sire.get() in sequence:
sire_id = indiv.sire.get().id
included_individuals.incl(indiv.sire.get())
if indiv.dam.isSome():
if indiv.dam.get() in sequence:
dam_id = indiv.dam.get().id
included_individuals.incl(indiv.dam.get())
case indiv.sex:
of male:
sex = "1"
of female:
sex = "2"
else:
sex = "0"
echo &"{family}\t{indiv.id}\t{sire_id}\t{dam_id}\t{sex}\t{affected}"
included_individuals.incl(indiv)
proc write_tsv*(individuals: HashSet[Individual]) =
#[Write individuals to TSV.]#
# Print only proband if it is the only relative.
if len(individuals) == 1:
for indiv in individuals:
echo &"{indiv.id}\t\t"
return
let sequence = individuals.toSeq().sorted(cmp=cmpIndividuals)
var included_individuals: HashSet[Individual]
for indiv in sequence:
var
sire_id: string
dam_id: string
if indiv.sire.isSome():
if indiv.sire.get() notin sequence:
sire_id = ""
else:
sire_id = indiv.sire.get().id
included_individuals.incl(indiv.sire.get())
else:
sire_id = ""
if indiv.dam.isSome():
if indiv.dam.get() notin sequence:
dam_id = ""
else:
dam_id = indiv.dam.get().id
included_individuals.incl(indiv.dam.get())
else:
dam_id = ""
# When parents are missing and is already listed as a parent of another, don't include
if sire_id == "" and dam_id == "":
block singleton:
for individual in individuals:
try:
if individual.sire.get() == indiv:
break singleton
except UnpackDefect:
discard
try:
if individual.dam.get() == indiv:
break singleton
except UnpackDefect:
discard
echo &"{indiv.id}\t\t"
else:
echo &"{indiv.id}\t{sire_id}\t{dam_id}"
included_individuals.incl(indiv)
proc write_matrix*(individuals: HashSet[Individual]) =
#[Write relationship coefficients as a matrix.]#
let sorted_individuals = individuals.toSeq().sorted(cmp=cmpIndividuals)
# Write header row
let header = sorted_individuals.map(indiv => indiv.id).join("\t")
echo &"\t{header}"
for indiv in sorted_individuals:
var coefficients = find_coefficients(indiv)
# Fill in all missing pairs
for indiv2 in sorted_individuals:
try:
discard coefficients[indiv2]
except KeyError:
# Report value as 0
coefficients[indiv2] = 0
let row = sorted_individuals.map(indiv => coefficients[indiv]).join("\t")
echo &"{indiv.id}\t{row}"
proc write_pairwise*(individuals: HashSet[Individual]) =
#[Write relationship coefficients as a pairwise TSV.]#
let sorted_individuals = individuals.toSeq().sorted(cmp=cmpIndividuals)
for indiv in sorted_individuals:
var coefficients = find_coefficients(indiv)
for indiv2 in sorted_individuals:
try:
echo &"{indiv.id}\t{indiv2.id}\t{coefficients[indiv2]}"
except KeyError:
# Report value as 0
echo &"{indiv.id}\t{indiv2.id}\t0"